From 876820789e9dedaa6198c4cd145702485e3bd21c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 27 May 2020 12:54:11 +0100 Subject: Add `rune_is_letter_or_digit` for tokenizer --- src/unicode.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/unicode.cpp') diff --git a/src/unicode.cpp b/src/unicode.cpp index b988155f7..83aa8deef 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) { return utf8proc_category(r) == UTF8PROC_CATEGORY_ND; } +bool rune_is_letter_or_digit(Rune r) { + if (r < 0x80) { + if (r == '_') { + return true; + } + if (((cast(u32)r | 0x20) - 0x61) < 26) { + return true; + } + return (cast(u32)r - '0') < 10; + } + switch (utf8proc_category(r)) { + case UTF8PROC_CATEGORY_LU: + case UTF8PROC_CATEGORY_LL: + case UTF8PROC_CATEGORY_LT: + case UTF8PROC_CATEGORY_LM: + case UTF8PROC_CATEGORY_LO: + return true; + case UTF8PROC_CATEGORY_ND: + return true; + } + return false; +} + bool rune_is_whitespace(Rune r) { switch (r) { case ' ': -- cgit v1.2.3