aboutsummaryrefslogtreecommitdiff
path: root/src/unicode.c
blob: 5c9f91f468167984387a8bbf8a1a3c7ad95c8b2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#pragma warning(push)
#pragma warning(disable: 4245)

// #include "utf8proc/utf8proc.h"
#include "utf8proc/utf8proc.c"

#pragma warning(pop)

bool rune_is_letter(Rune r) {
	if ((r < 0x80 && gb_char_is_alpha(cast(char)r)) ||
	    r == '_') {
		return true;
	}
	switch (utf8proc_category(r)) {
	case UTF8PROC_CATEGORY_LU:
	case UTF8PROC_CATEGORY_LL:
	case UTF8PROC_CATEGORY_LT:
	case UTF8PROC_CATEGORY_LM:
	case UTF8PROC_CATEGORY_LO:
		return true;
	}
	return false;
}

bool rune_is_digit(Rune r) {
	if (r < 0x80 && gb_is_between(r, '0', '9')) {
		return true;
	}
	return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
}

bool rune_is_whitespace(Rune r) {
	switch (r) {
	case ' ':
	case '\t':
	case '\n':
	case '\r':
		return true;
	}
	return false;
}


bool is_string_an_identifier(String s) {
	if (s.len < 1) {
		return false;
	}
	isize offset = 0;
	while (offset < s.len) {
		bool ok = false;
		Rune r = -1;
		isize size = gb_utf8_decode(s.text+offset, s.len-offset, &r);
		if (offset == 0) {
			ok = rune_is_letter(r);
		} else {
			ok = rune_is_letter(r) || rune_is_digit(r);
		}

		if (!ok) {
			return false;
		}
		offset += size;
	}

	return offset == s.len;
}