diff options
Diffstat (limited to 'src/unicode.c')
| -rw-r--r-- | src/unicode.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/src/unicode.c b/src/unicode.c new file mode 100644 index 000000000..5c9f91f46 --- /dev/null +++ b/src/unicode.c @@ -0,0 +1,66 @@ +#pragma warning(push) +#pragma warning(disable: 4245) + +// #include "utf8proc/utf8proc.h" +#include "utf8proc/utf8proc.c" + +#pragma warning(pop) + +bool rune_is_letter(Rune r) { + if ((r < 0x80 && gb_char_is_alpha(cast(char)r)) || + r == '_') { + return true; + } + switch (utf8proc_category(r)) { + case UTF8PROC_CATEGORY_LU: + case UTF8PROC_CATEGORY_LL: + case UTF8PROC_CATEGORY_LT: + case UTF8PROC_CATEGORY_LM: + case UTF8PROC_CATEGORY_LO: + return true; + } + return false; +} + +bool rune_is_digit(Rune r) { + if (r < 0x80 && gb_is_between(r, '0', '9')) { + return true; + } + return utf8proc_category(r) == UTF8PROC_CATEGORY_ND; +} + +bool rune_is_whitespace(Rune r) { + switch (r) { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + } + return false; +} + + +bool is_string_an_identifier(String s) { + if (s.len < 1) { + return false; + } + isize offset = 0; + while (offset < s.len) { + bool ok = false; + Rune r = -1; + isize size = gb_utf8_decode(s.text+offset, s.len-offset, &r); + if (offset == 0) { + ok = rune_is_letter(r); + } else { + ok = rune_is_letter(r) || rune_is_digit(r); + } + + if (!ok) { + return false; + } + offset += size; + } + + return offset == s.len; +} |