aboutsummaryrefslogtreecommitdiff
path: root/src/unicode.cpp
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2020-05-27 12:54:11 +0100
committergingerBill <bill@gingerbill.org>2020-05-27 12:54:11 +0100
commit876820789e9dedaa6198c4cd145702485e3bd21c (patch)
treebcfb69df5b3eefdad8587d609e4139bc35b36a3f /src/unicode.cpp
parent4e21a4d46a90c56edd45f5d5c46b375742738a17 (diff)
Add `rune_is_letter_or_digit` for tokenizer
Diffstat (limited to 'src/unicode.cpp')
-rw-r--r--src/unicode.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/unicode.cpp b/src/unicode.cpp
index b988155f7..83aa8deef 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) {
return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
}
+bool rune_is_letter_or_digit(Rune r) {
+ if (r < 0x80) {
+ if (r == '_') {
+ return true;
+ }
+ if (((cast(u32)r | 0x20) - 0x61) < 26) {
+ return true;
+ }
+ return (cast(u32)r - '0') < 10;
+ }
+ switch (utf8proc_category(r)) {
+ case UTF8PROC_CATEGORY_LU:
+ case UTF8PROC_CATEGORY_LL:
+ case UTF8PROC_CATEGORY_LT:
+ case UTF8PROC_CATEGORY_LM:
+ case UTF8PROC_CATEGORY_LO:
+ return true;
+ case UTF8PROC_CATEGORY_ND:
+ return true;
+ }
+ return false;
+}
+
bool rune_is_whitespace(Rune r) {
switch (r) {
case ' ':