aboutsummaryrefslogtreecommitdiff
path: root/src/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode.c')
-rw-r--r--src/unicode.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/unicode.c b/src/unicode.c
new file mode 100644
index 000000000..5c9f91f46
--- /dev/null
+++ b/src/unicode.c
@@ -0,0 +1,66 @@
+#pragma warning(push)
+#pragma warning(disable: 4245)
+
+// #include "utf8proc/utf8proc.h"
+#include "utf8proc/utf8proc.c"
+
+#pragma warning(pop)
+
+bool rune_is_letter(Rune r) {
+ if ((r < 0x80 && gb_char_is_alpha(cast(char)r)) ||
+ r == '_') {
+ return true;
+ }
+ switch (utf8proc_category(r)) {
+ case UTF8PROC_CATEGORY_LU:
+ case UTF8PROC_CATEGORY_LL:
+ case UTF8PROC_CATEGORY_LT:
+ case UTF8PROC_CATEGORY_LM:
+ case UTF8PROC_CATEGORY_LO:
+ return true;
+ }
+ return false;
+}
+
+bool rune_is_digit(Rune r) {
+ if (r < 0x80 && gb_is_between(r, '0', '9')) {
+ return true;
+ }
+ return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
+}
+
+bool rune_is_whitespace(Rune r) {
+ switch (r) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ return true;
+ }
+ return false;
+}
+
+
+bool is_string_an_identifier(String s) {
+ if (s.len < 1) {
+ return false;
+ }
+ isize offset = 0;
+ while (offset < s.len) {
+ bool ok = false;
+ Rune r = -1;
+ isize size = gb_utf8_decode(s.text+offset, s.len-offset, &r);
+ if (offset == 0) {
+ ok = rune_is_letter(r);
+ } else {
+ ok = rune_is_letter(r) || rune_is_digit(r);
+ }
+
+ if (!ok) {
+ return false;
+ }
+ offset += size;
+ }
+
+ return offset == s.len;
+}