aboutsummaryrefslogtreecommitdiff
path: root/src/unicode.cpp
blob: 83aa8deef219a5b31d1024b4d99a12573a9ec96c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#pragma warning(push)
#pragma warning(disable: 4245)

extern "C" {
#include "utf8proc/utf8proc.c"
}
#pragma warning(pop)


bool rune_is_letter(Rune r) {
	if (r < 0x80) {
		if (r == '_') {
			return true;
		}
		return ((cast(u32)r | 0x20) - 0x61) < 26;
	}
	switch (utf8proc_category(r)) {
	case UTF8PROC_CATEGORY_LU:
	case UTF8PROC_CATEGORY_LL:
	case UTF8PROC_CATEGORY_LT:
	case UTF8PROC_CATEGORY_LM:
	case UTF8PROC_CATEGORY_LO:
		return true;
	}
	return false;
}

bool rune_is_digit(Rune r) {
	if (r < 0x80) {
		return (cast(u32)r - '0') < 10;
	}
	return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
}

bool rune_is_letter_or_digit(Rune r) {
	if (r < 0x80) {
		if (r == '_') {
			return true;
		}
		if (((cast(u32)r | 0x20) - 0x61) < 26) {
			return true;
		}
		return (cast(u32)r - '0') < 10;
	}
	switch (utf8proc_category(r)) {
	case UTF8PROC_CATEGORY_LU:
	case UTF8PROC_CATEGORY_LL:
	case UTF8PROC_CATEGORY_LT:
	case UTF8PROC_CATEGORY_LM:
	case UTF8PROC_CATEGORY_LO:
		return true;
	case UTF8PROC_CATEGORY_ND:
		return true;
	}
	return false;
}

bool rune_is_whitespace(Rune r) {
	switch (r) {
	case ' ':
	case '\t':
	case '\n':
	case '\r':
		return true;
	}
	return false;
}