aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2024-07-01 11:03:29 +0100
committerGitHub <noreply@github.com>2024-07-01 11:03:29 +0100
commitfb0b95bcadeb60555920c9f3c8ecd2467c71bf83 (patch)
treee14eb158c33e7b9d088dc8d3cb797a840df5ec12
parent1a4edad63e45f91e8ef45836868ba8285a4e8f09 (diff)
parent8ed5cb283b5039693e96d6f47eea6213194d6cbe (diff)
Merge pull request #3760 from Feoramund/refactor-show-error-on-line
Refactor `show_error_on_line`
-rw-r--r--src/error.cpp259
-rw-r--r--src/parser.cpp11
-rw-r--r--src/tokenizer.cpp4
-rw-r--r--src/ucg/ucg.c686
-rw-r--r--src/ucg/ucg_tables.h2629
-rw-r--r--src/unicode.cpp5
6 files changed, 3536 insertions, 58 deletions
diff --git a/src/error.cpp b/src/error.cpp
index 03d96219b..f95123f15 100644
--- a/src/error.cpp
+++ b/src/error.cpp
@@ -237,6 +237,7 @@ enum TerminalColour {
TerminalColour_Blue,
TerminalColour_Purple,
TerminalColour_Black,
+ TerminalColour_Grey,
};
gb_internal void terminal_set_colours(TerminalStyle style, TerminalColour foreground) {
@@ -256,6 +257,7 @@ gb_internal void terminal_set_colours(TerminalStyle style, TerminalColour foregr
case TerminalColour_Blue: error_out("\x1b[%s;34m", ss); break;
case TerminalColour_Purple: error_out("\x1b[%s;35m", ss); break;
case TerminalColour_Black: error_out("\x1b[%s;30m", ss); break;
+ case TerminalColour_Grey: error_out("\x1b[%s;90m", ss); break;
}
}
}
@@ -272,85 +274,234 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) {
return -1;
}
- i32 offset = 0;
- gbString the_line = get_file_line_as_string(pos, &offset);
+ i32 error_start_index_bytes = 0;
+ gbString the_line = get_file_line_as_string(pos, &error_start_index_bytes);
defer (gb_string_free(the_line));
- if (the_line != nullptr) {
- char const *line_text = the_line;
- isize line_len = gb_string_length(the_line);
+ if (the_line == nullptr || gb_string_length(the_line) == 0) {
+ terminal_set_colours(TerminalStyle_Normal, TerminalColour_Grey);
+ error_out("\t( empty line )\n");
+ terminal_reset_colours();
+
+ if (the_line == nullptr) {
+ return -1;
+ } else {
+ return cast(isize)error_start_index_bytes;
+ }
+ }
- // TODO(bill): This assumes ASCII
+ // These two will be used like an Odin slice later.
+ char const *line_text = the_line;
+ i32 line_length_bytes = cast(i32)gb_string_length(the_line);
- enum {
- MAX_LINE_LENGTH = 80,
- MAX_TAB_WIDTH = 8,
- ELLIPSIS_PADDING = 8, // `... ...`
- MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH-MAX_TAB_WIDTH-ELLIPSIS_PADDING,
+ ucg_grapheme* graphemes;
+ i32 line_length_runes = 0;
+ i32 line_length_graphemes = 0;
+ i32 line_width = 0;
+
+ int ucg_result = ucg_decode_grapheme_clusters(
+ permanent_allocator(), (const uint8_t*)line_text, line_length_bytes,
+ &graphemes, &line_length_runes, &line_length_graphemes, &line_width);
+
+ if (ucg_result < 0) {
+ // There was a UTF-8 parsing error.
+ // Insert a dummy grapheme so the start of the invalid rune can be pointed at.
+ graphemes = (ucg_grapheme*)gb_resize(permanent_allocator(),
+ graphemes,
+ sizeof(ucg_grapheme) * (line_length_graphemes),
+ sizeof(ucg_grapheme) * (1 + line_length_graphemes));
+
+ ucg_grapheme append = {
+ error_start_index_bytes,
+ line_length_runes,
+ 1,
};
- i32 error_length = gb_max(end.offset - pos.offset, 1);
+ graphemes[line_length_graphemes] = append;
+ }
+
+ // The units below are counted in visual, monospace cells.
+ enum {
+ MAX_LINE_LENGTH = 80,
+ MAX_TAB_WIDTH = 8,
+ ELLIPSIS_PADDING = 8, // `... ...`
+ MIN_LEFT_VIEW = 8,
+
+ // A rough estimate of how many characters we'll insert, at most:
+ MAX_INSERTED_WIDTH = MAX_TAB_WIDTH + ELLIPSIS_PADDING,
+
+ MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH - MAX_INSERTED_WIDTH,
+ };
+
+ i32 error_start_index_graphemes = 0;
+ for (i32 i = 0; i < line_length_graphemes; i += 1) {
+ if (graphemes[i].byte_index == error_start_index_bytes) {
+ error_start_index_graphemes = i;
+ break;
+ }
+ }
+
+ if (error_start_index_graphemes == 0 && error_start_index_bytes != 0 && line_length_graphemes != 0) {
+ // The error index in graphemes was not found, but we did find a valid Unicode string.
+ //
+ // This is an edge case where the error is sitting on a newline or the
+ // end of the line, as that is the only location we could not have checked.
+ error_start_index_graphemes = line_length_graphemes;
+ }
- error_out("\t");
+ error_out("\t");
- terminal_set_colours(TerminalStyle_Bold, TerminalColour_White);
+ bool show_right_ellipsis = false;
+ i32 squiggle_padding = 0;
+ i32 window_open_bytes = 0;
+ i32 window_close_bytes = 0;
+ if (line_width > MAX_LINE_LENGTH_PADDED) {
+ // Now that we know the line is over the length limit, we have to
+ // compose a visual window in which to display the error.
+ i32 window_size_left = 0;
+ i32 window_size_right = 0;
+ i32 window_open_graphemes = 0;
- isize squiggle_extra = 0;
+ for (i32 i = error_start_index_graphemes - 1; i > 0; i -= 1) {
+ window_size_left += graphemes[i].width;
+ if (window_size_left >= MIN_LEFT_VIEW) {
+ window_open_graphemes = i;
+ window_open_bytes = graphemes[i].byte_index;
+ break;
+ }
+ }
- if (line_len > MAX_LINE_LENGTH_PADDED) {
- i32 left = MAX_TAB_WIDTH;
- i32 diff = gb_max(offset-left, 0);
- if (diff > 0) {
- line_text += diff;
- line_len -= diff;
- offset = left + ELLIPSIS_PADDING/2;
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ window_size_right += graphemes[i].width;
+ if (window_size_right >= MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+ window_close_bytes = graphemes[i].byte_index;
+ break;
}
- if (line_len > MAX_LINE_LENGTH_PADDED) {
- line_len = MAX_LINE_LENGTH_PADDED;
- if (error_length > line_len-left) {
- error_length = cast(i32)line_len - left;
- squiggle_extra = 1;
+ }
+ if (window_close_bytes == 0) {
+ // The window ends at the end of the line.
+ window_close_bytes = line_length_bytes;
+ }
+
+ if (window_size_right < MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+ // Hit the end of the string early on the right side; expand backwards.
+ for (i32 i = window_open_graphemes - 1; i > 0; i -= 1) {
+ window_size_left += graphemes[i].width;
+ if (window_size_left + window_size_right >= MAX_LINE_LENGTH_PADDED) {
+ window_open_graphemes = i;
+ window_open_bytes = graphemes[i].byte_index;
+ break;
}
}
- if (diff > 0) {
- error_out("... %.*s ...", cast(i32)line_len, line_text);
- } else {
- error_out("%.*s ...", cast(i32)line_len, line_text);
- }
- } else {
- error_out("%.*s", cast(i32)line_len, line_text);
}
- error_out("\n\t");
- for (i32 i = 0; i < offset; i++) {
- error_out(" ");
+ GB_ASSERT_MSG(window_close_bytes >= window_open_bytes, "Error line truncation window has wrong byte indices. (open, close: %i, %i)", window_open_bytes, window_close_bytes);
+
+ if (window_close_bytes != line_length_bytes) {
+ show_right_ellipsis = true;
}
- terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green);
+ // Close the window, going left.
+ line_length_bytes = window_close_bytes;
- error_out("^");
- if (end.file_id == pos.file_id) {
- if (end.line > pos.line) {
- for (i32 i = offset; i < line_len; i++) {
- error_out("~");
- }
- } else if (end.line == pos.line && end.column > pos.column) {
- for (i32 i = 1; i < error_length-1+squiggle_extra; i++) {
- error_out("~");
- }
- if (error_length > 1 && squiggle_extra == 0) {
- error_out("^");
+ // Adjust the slice of text. In Odin, this would be:
+ // `line_text = line_text[window_left_bytes:]`
+ line_text += window_open_bytes;
+ line_length_bytes -= window_open_bytes;
+ GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
+
+ if (window_open_bytes > 0) {
+ error_out("... ");
+ squiggle_padding += 4;
+ }
+ } else {
+ // No truncation needed.
+ window_open_bytes = 0;
+ window_close_bytes = line_length_bytes;
+ }
+
+ for (i32 i = error_start_index_graphemes; i > 0; i -= 1) {
+ if (graphemes[i].byte_index == window_open_bytes) {
+ break;
+ }
+ squiggle_padding += graphemes[i].width;
+ }
+
+ // Start printing code.
+
+ terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
+ error_out("%.*s", line_length_bytes, line_text);
+
+ i32 squiggle_length = 0;
+ bool trailing_squiggle = false;
+
+ if (end.file_id == pos.file_id) {
+ // The error has an endpoint.
+
+ if (end.line > pos.line) {
+ // Error goes to next line.
+ // Always show the ellipsis in this case
+ show_right_ellipsis = true;
+
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ squiggle_length += graphemes[i].width;
+ trailing_squiggle = true;
+ }
+
+ } else if (end.line == pos.line && end.column > pos.column) {
+ // Error terminates before line end.
+ i32 adjusted_end_index = graphemes[error_start_index_graphemes].byte_index + end.column - pos.column;
+
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ if (graphemes[i].byte_index >= adjusted_end_index) {
+ break;
+ } else if (graphemes[i].byte_index >= window_close_bytes) {
+ trailing_squiggle = true;
+ break;
}
+ squiggle_length += graphemes[i].width;
}
}
+ } else {
+ // The error is at one spot; no range known.
+ squiggle_length = 1;
+ }
- terminal_reset_colours();
+ if (show_right_ellipsis) {
+ error_out(" ...");
+ }
- error_out("\n");
- return offset;
+ error_out("\n\t");
+
+ for (i32 i = squiggle_padding; i > 0; i -= 1) {
+ error_out(" ");
}
- return -1;
+
+ terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green);
+
+ if (squiggle_length > 0) {
+ error_out("^");
+ squiggle_length -= 1;
+ }
+ for (/**/; squiggle_length > 1; squiggle_length -= 1) {
+ error_out("~");
+ }
+ if (squiggle_length > 0) {
+ if (trailing_squiggle) {
+ error_out("~ ...");
+ } else {
+ error_out("^");
+ }
+ }
+
+ // NOTE(Feoramund): Specifically print a newline, then reset colours,
+ // instead of the other way around. Otherwise the printing mechanism
+ // will collapse the newline for reasons currently beyond my ken.
+ error_out("\n");
+ terminal_reset_colours();
+
+ return squiggle_padding;
}
gb_internal void error_out_empty(void) {
diff --git a/src/parser.cpp b/src/parser.cpp
index 583f4a57d..93889d1b2 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -71,6 +71,12 @@ gb_internal gbString get_file_line_as_string(TokenPos const &pos, i32 *offset_)
u8 *line_start = pos_offset;
u8 *line_end = pos_offset;
+
+ if (offset > 0 && *line_start == '\n') {
+ // Prevent an error token that starts at the boundary of a line that
+ // leads to an empty line from advancing off its line.
+ line_start -= 1;
+ }
while (line_start >= start) {
if (*line_start == '\n') {
line_start += 1;
@@ -78,6 +84,11 @@ gb_internal gbString get_file_line_as_string(TokenPos const &pos, i32 *offset_)
}
line_start -= 1;
}
+ if (line_start == start - 1) {
+ // Prevent an error on the first line from stepping behind the boundary
+ // of the text.
+ line_start += 1;
+ }
while (line_end < end) {
if (*line_end == '\n') {
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 2af41b881..4425bee29 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -786,7 +786,6 @@ gb_internal void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) {
case '`': // Raw String Literal
case '"': // String Literal
{
- bool has_carriage_return = false;
i32 success;
Rune quote = curr_rune;
token->kind = Token_String;
@@ -816,9 +815,6 @@ gb_internal void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) {
if (r == quote) {
break;
}
- if (r == '\r') {
- has_carriage_return = true;
- }
}
}
token->string.len = t->curr - token->string.text;
diff --git a/src/ucg/ucg.c b/src/ucg/ucg.c
new file mode 100644
index 000000000..c3e270e1a
--- /dev/null
+++ b/src/ucg/ucg.c
@@ -0,0 +1,686 @@
+/*
+ * SPDX-FileCopyrightText: (c) 2024 Feoramund
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+
+//
+// NOTE(Feoramund): This is my UCG library, adapted for use within the Odin compiler.
+// Most of the comments have been let alone and may not strictly apply anymore.
+//
+// 1. The UCG allocator interface was replaced by gbAllocator.
+// 2. The UCG UTF-8 decoder was replaced with the one already in the compiler.
+// 3. Non-essential code was stripped.
+// 4. Some types were changed for compatibility.
+//
+
+
+/* This is the data that is allocated when an allocator is passed to
+ * ucg_decode_grapheme_clusters. */
+typedef struct {
+ i32 byte_index;
+ i32 rune_index;
+ i32 width;
+} ucg_grapheme;
+
+
+/* #include "ucg.h" */
+#include "ucg_tables.h"
+
+#define UCG_TABLE_LEN(t) (sizeof(t) / sizeof(int32_t))
+
+#define ZERO_WIDTH_SPACE 0x200B
+#define ZERO_WIDTH_NON_JOINER 0x200C
+#define ZERO_WIDTH_JOINER 0x200D
+#define WORD_JOINER 0x2060
+
+int ucg_binary_search(int32_t value, const int32_t* table, int length, int stride) {
+ GB_ASSERT(table != NULL);
+ GB_ASSERT(length > 0);
+ GB_ASSERT(stride > 0);
+
+ int n = length;
+ int t = 0;
+ for (/**/; n > 1; /**/) {
+ int m = n / 2;
+ int p = t + m * stride;
+ if (value >= table[p]) {
+ t = p;
+ n = n - m;
+ } else {
+ n = m;
+ }
+ }
+ if (n != 0 && value >= table[t]) {
+ return t;
+ }
+ return -1;
+}
+
+//
+// The procedures below are accurate as of Unicode 15.1.0.
+//
+
+bool ucg_is_control(int32_t r) {
+ if (r <= 0x1F || (0x7F <= r && r <= 0x9F)) {
+ return true;
+ }
+ return false;
+}
+
+// Emoji_Modifier
+bool ucg_is_emoji_modifier(int32_t r) {
+ return 0x1F3FB <= r && r <= 0x1F3FF;
+}
+
+// Regional_Indicator
+bool ucg_is_regional_indicator(int32_t r) {
+ return 0x1F1E6 <= r && r <= 0x1F1FF;
+}
+
+// General_Category=Enclosing_Mark
+bool ucg_is_enclosing_mark(int32_t r) {
+ switch (r) {
+ case 0x0488:
+ case 0x0489:
+ case 0x1ABE:
+ return true;
+ }
+
+ if (0x20DD <= r && r <= 0x20E0) { return true; }
+ if (0x20E2 <= r && r <= 0x20E4) { return true; }
+ if (0xA670 <= r && r <= 0xA672) { return true; }
+
+ return false;
+}
+
+// Prepended_Concatenation_Mark
+bool ucg_is_prepended_concatenation_mark(int32_t r) {
+ switch (r) {
+ case 0x006DD:
+ case 0x0070F:
+ case 0x008E2:
+ case 0x110BD:
+ case 0x110CD:
+ return true;
+ }
+
+ if (0x00600 <= r && r <= 0x00605) { return true; }
+ if (0x00890 <= r && r <= 0x00891) { return true; }
+
+ return false;
+}
+
+// General_Category=Spacing_Mark
+bool ucg_is_spacing_mark(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_spacing_mark_ranges, UCG_TABLE_LEN(ucg_spacing_mark_ranges)/2, 2);
+ if (p >= 0 && ucg_spacing_mark_ranges[p] <= r && r <= ucg_spacing_mark_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+// General_Category=Nonspacing_Mark
+bool ucg_is_nonspacing_mark(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_nonspacing_mark_ranges, UCG_TABLE_LEN(ucg_nonspacing_mark_ranges)/2, 2);
+ if (p >= 0 && ucg_nonspacing_mark_ranges[p] <= r && r <= ucg_nonspacing_mark_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+// Extended_Pictographic
+bool ucg_is_emoji_extended_pictographic(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_emoji_extended_pictographic_ranges, UCG_TABLE_LEN(ucg_emoji_extended_pictographic_ranges)/2, 2);
+ if (p >= 0 && ucg_emoji_extended_pictographic_ranges[p] <= r && r <= ucg_emoji_extended_pictographic_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+// Grapheme_Extend
+bool ucg_is_grapheme_extend(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_grapheme_extend_ranges, UCG_TABLE_LEN(ucg_grapheme_extend_ranges)/2, 2);
+ if (p >= 0 && ucg_grapheme_extend_ranges[p] <= r && r <= ucg_grapheme_extend_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+
+// Hangul_Syllable_Type=Leading_Jamo
+bool ucg_is_hangul_syllable_leading(int32_t r) {
+ return (0x1100 <= r && r <= 0x115F) || (0xA960 <= r && r <= 0xA97C);
+}
+
+// Hangul_Syllable_Type=Vowel_Jamo
+bool ucg_is_hangul_syllable_vowel(int32_t r) {
+ return (0x1160 <= r && r <= 0x11A7) || (0xD7B0 <= r && r <= 0xD7C6);
+}
+
+// Hangul_Syllable_Type=Trailing_Jamo
+bool ucg_is_hangul_syllable_trailing(int32_t r) {
+ return (0x11A8 <= r && r <= 0x11FF) || (0xD7CB <= r && r <= 0xD7FB);
+}
+
+// Hangul_Syllable_Type=LV_Syllable
+bool ucg_is_hangul_syllable_lv(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lv_singlets, UCG_TABLE_LEN(ucg_hangul_syllable_lv_singlets), 1);
+ if (p >= 0 && r == ucg_hangul_syllable_lv_singlets[p]) {
+ return true;
+ }
+ return false;
+}
+
+// Hangul_Syllable_Type=LVT_Syllable
+bool ucg_is_hangul_syllable_lvt(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lvt_ranges, UCG_TABLE_LEN(ucg_hangul_syllable_lvt_ranges)/2, 2);
+ if (p >= 0 && ucg_hangul_syllable_lvt_ranges[p] <= r && r <= ucg_hangul_syllable_lvt_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+
+// Indic_Syllabic_Category=Consonant_Preceding_Repha
+bool ucg_is_indic_consonant_preceding_repha(int32_t r) {
+ switch (r) {
+ case 0x00D4E:
+ case 0x11941:
+ case 0x11D46:
+ case 0x11F02:
+ return true;
+ }
+ return false;
+}
+
+// Indic_Syllabic_Category=Consonant_Prefixed
+bool ucg_is_indic_consonant_prefixed(int32_t r) {
+ switch (r) {
+ case 0x1193F:
+ case 0x11A3A:
+ return true;
+ }
+
+ if (0x111C2 <= r && r <= 0x111C3) { return true; }
+ if (0x11A84 <= r && r <= 0x11A89) { return true; }
+
+ return false;
+}
+
+// Indic_Conjunct_Break=Linker
+bool ucg_is_indic_conjunct_break_linker(int32_t r) {
+ switch (r) {
+ case 0x094D:
+ case 0x09CD:
+ case 0x0ACD:
+ case 0x0B4D:
+ case 0x0C4D:
+ case 0x0D4D:
+ return true;
+ }
+ return false;
+}
+
+// Indic_Conjunct_Break=Consonant
+bool ucg_is_indic_conjunct_break_consonant(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_consonant_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_consonant_ranges)/2, 2);
+ if (p >= 0 && ucg_indic_conjunct_break_consonant_ranges[p] <= r && r <= ucg_indic_conjunct_break_consonant_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+// Indic_Conjunct_Break=Extend
+bool ucg_is_indic_conjunct_break_extend(int32_t r) {
+ intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_extend_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_extend_ranges)/2, 2);
+ if (p >= 0 && ucg_indic_conjunct_break_extend_ranges[p] <= r && r <= ucg_indic_conjunct_break_extend_ranges[p+1]) {
+ return true;
+ }
+ return false;
+}
+
+
+/*
+```
+Indic_Syllabic_Category = Consonant_Preceding_Repha, or
+Indic_Syllabic_Category = Consonant_Prefixed, or
+Prepended_Concatenation_Mark = Yes
+```
+*/
+bool ucg_is_gcb_prepend_class(int32_t r) {
+ return ucg_is_indic_consonant_preceding_repha(r) || ucg_is_indic_consonant_prefixed(r) || ucg_is_prepended_concatenation_mark(r);
+}
+
+/*
+```
+Grapheme_Extend = Yes, or
+Emoji_Modifier = Yes
+
+This includes:
+General_Category = Nonspacing_Mark
+General_Category = Enclosing_Mark
+U+200C ZERO WIDTH NON-JOINER
+
+plus a few General_Category = Spacing_Mark needed for canonical equivalence.
+```
+*/
+bool ucg_is_gcb_extend_class(int32_t r) {
+ return ucg_is_grapheme_extend(r) || ucg_is_emoji_modifier(r);
+}
+
+// Return values:
+//
+// - 2 if East_Asian_Width=F or W, or
+// - 0 if non-printable / zero-width, or
+// - 1 in all other cases.
+//
+int ucg_normalized_east_asian_width(int32_t r) {
+ if (ucg_is_control(r)) {
+ return 0;
+ } else if (r <= 0x10FF) {
+ // Easy early out for low runes.
+ return 1;
+ }
+
+ switch (r) {
+ // This is a different interpretation of the BOM which occurs in the middle of text.
+ case 0xFEFF: /* ZERO_WIDTH_NO_BREAK_SPACE */
+ case ZERO_WIDTH_SPACE:
+ case ZERO_WIDTH_NON_JOINER:
+ case ZERO_WIDTH_JOINER:
+ case WORD_JOINER:
+ return 0;
+ }
+
+ intptr_t p = ucg_binary_search(r, ucg_normalized_east_asian_width_ranges, UCG_TABLE_LEN(ucg_normalized_east_asian_width_ranges)/3, 3);
+ if (p >= 0 && ucg_normalized_east_asian_width_ranges[p] <= r && r <= ucg_normalized_east_asian_width_ranges[p+1]) {
+ return (int)ucg_normalized_east_asian_width_ranges[p+2];
+ }
+ return 1;
+}
+
+//
+// End of Unicode 15.1.0 block.
+//
+
+enum grapheme_cluster_sequence {
+ None,
+ Indic,
+ Emoji,
+ Regional,
+};
+
+typedef struct {
+ ucg_grapheme* graphemes;
+ i32 rune_count;
+ i32 grapheme_count;
+ i32 width;
+
+ int32_t last_rune;
+ bool last_rune_breaks_forward;
+
+ i32 last_width;
+ i32 last_grapheme_count;
+
+ bool bypass_next_rune;
+
+ int regional_indicator_counter;
+
+ enum grapheme_cluster_sequence current_sequence;
+ bool continue_sequence;
+} ucg_decoder_state;
+
+
+void _ucg_decode_grapheme_clusters_deferred_step(
+ gbAllocator allocator,
+ ucg_decoder_state* state,
+ i32 byte_index,
+ int32_t this_rune
+) {
+ // "Break at the start and end of text, unless the text is empty."
+ //
+ // GB1: sot ÷ Any
+ // GB2: Any ÷ eot
+ if (state->rune_count == 0 && state->grapheme_count == 0) {
+ state->grapheme_count += 1;
+ }
+
+ if (state->grapheme_count > state->last_grapheme_count) {
+ state->width += ucg_normalized_east_asian_width(this_rune);
+
+ /* if (allocator != NULL) { */
+ state->graphemes = (ucg_grapheme*)gb_resize(allocator,
+ state->graphemes,
+ sizeof(ucg_grapheme) * (state->grapheme_count),
+ sizeof(ucg_grapheme) * (1 + state->grapheme_count));
+
+ ucg_grapheme append = {
+ byte_index,
+ state->rune_count,
+ state->width - state->last_width,
+ };
+
+ state->graphemes[state->grapheme_count - 1] = append;
+ /* } */
+
+ state->last_grapheme_count = state->grapheme_count;
+ state->last_width = state->width;
+ }
+
+ state->last_rune = this_rune;
+ state->rune_count += 1;
+
+ if (!state->continue_sequence) {
+ state->current_sequence = None;
+ state->regional_indicator_counter = 0;
+ }
+ state->continue_sequence = false;
+}
+
+int ucg_decode_grapheme_clusters(
+ gbAllocator allocator,
+ const uint8_t* str,
+ int str_len,
+
+ ucg_grapheme** out_graphemes,
+ i32* out_rune_count,
+ i32* out_grapheme_count,
+ i32* out_width
+) {
+ // The following procedure implements text segmentation by breaking on
+ // Grapheme Cluster Boundaries[1], using the values[2] and rules[3] from
+ // the Unicode® Standard Annex #29, entitled:
+ //
+ // UNICODE TEXT SEGMENTATION
+ //
+ // Version: Unicode 15.1.0
+ // Date: 2023-08-16
+ // Revision: 43
+ //
+ // This procedure is conformant[4] to UAX29-C1-1, otherwise known as the
+ // extended, non-legacy ruleset.
+ //
+ // Please see the references for more information.
+ //
+ //
+ // [1]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+ // [2]: https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
+ // [3]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
+ // [4]: https://www.unicode.org/reports/tr29/#Conformance
+
+ // Additionally, this procedure takes into account Standard Annex #11,
+ // in order to estimate how visually wide the string will appear on a
+ // monospaced display. This can only ever be a rough guess, as this tends
+ // to be an implementation detail relating to which fonts are being used,
+ // how codepoints are interpreted and drawn, if codepoint sequences are
+ // interpreted correctly, and et cetera.
+ //
+ // For example, a program may not properly interpret an emoji modifier
+ // sequence and print the component glyphs instead of one whole glyph.
+ //
+ // See here for more information: https://www.unicode.org/reports/tr11/
+ //
+ // NOTE: There is no explicit mention of what to do with zero-width spaces
+ // as far as grapheme cluster segmentation goes, therefore this
+ // implementation may count and return graphemes with a `width` of zero.
+ //
+ // Treat them as any other space.
+
+ ucg_decoder_state state = {0};
+
+#define UCG_DEFERRED_DECODE_STEP() (_ucg_decode_grapheme_clusters_deferred_step(allocator, &state, byte_index, this_rune))
+
+ for (i32 byte_index = 0, bytes_advanced = 0; byte_index < str_len; byte_index += bytes_advanced) {
+ int32_t this_rune = GB_RUNE_INVALID;
+ bytes_advanced = (i32)(utf8_decode(str+byte_index, str_len-byte_index, &this_rune));
+ if (this_rune == GB_RUNE_INVALID || bytes_advanced == 0) {
+ // There was a Unicode parsing error; bail out.
+ if (out_graphemes != NULL) { *out_graphemes = state.graphemes; }
+ if (out_rune_count != NULL) { *out_rune_count = state.rune_count; }
+ if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; }
+ if (out_width != NULL) { *out_width = state.width; }
+
+ // Return an error code.
+ return -1;
+ }
+
+ // "Do not break between a CR and LF. Otherwise, break before and after controls."
+ //
+ // GB3: CR × LF
+ // GB4: (Control | CR | LF) ÷
+ // GB5: ÷ (Control | CR | LF)
+ if (this_rune == '\n' && state.last_rune == '\r') {
+ state.last_rune_breaks_forward = false;
+ state.bypass_next_rune = false;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_control(this_rune)) {
+ state.grapheme_count += 1;
+ state.last_rune_breaks_forward = true;
+ state.bypass_next_rune = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // (This check is for rules that work forwards, instead of backwards.)
+ if (state.bypass_next_rune) {
+ if (state.last_rune_breaks_forward) {
+ state.grapheme_count += 1;
+ state.last_rune_breaks_forward = false;
+ }
+
+ state.bypass_next_rune = false;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // (Optimization 1: Prevent low runes from proceeding further.)
+ //
+ // * 0xA9 and 0xAE are in the Extended_Pictographic range,
+ // which is checked later in GB11.
+ if (this_rune != 0xA9 && this_rune != 0xAE && this_rune <= 0x2FF) {
+ state.grapheme_count += 1;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // (Optimization 2: Check if the rune is in the Hangul space before getting specific.)
+ if (0x1100 <= this_rune && this_rune <= 0xD7FB) {
+ // "Do not break Hangul syllable sequences."
+ //
+ // GB6: L × (L | V | LV | LVT)
+ // GB7: (LV | V) × (V | T)
+ // GB8: (LVT | T) × T
+ if (ucg_is_hangul_syllable_leading(this_rune) ||
+ ucg_is_hangul_syllable_lv(this_rune) ||
+ ucg_is_hangul_syllable_lvt(this_rune))
+ {
+ if (!ucg_is_hangul_syllable_leading(state.last_rune)) {
+ state.grapheme_count += 1;
+ }
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_hangul_syllable_vowel(this_rune)) {
+ if (ucg_is_hangul_syllable_leading(state.last_rune) ||
+ ucg_is_hangul_syllable_vowel(state.last_rune) ||
+ ucg_is_hangul_syllable_lv(state.last_rune))
+ {
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+ state.grapheme_count += 1;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_hangul_syllable_trailing(this_rune)) {
+ if (ucg_is_hangul_syllable_trailing(state.last_rune) ||
+ ucg_is_hangul_syllable_lvt(state.last_rune) ||
+ ucg_is_hangul_syllable_lv(state.last_rune) ||
+ ucg_is_hangul_syllable_vowel(state.last_rune))
+ {
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+ state.grapheme_count += 1;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+ }
+
+ // "Do not break before extending characters or ZWJ."
+ //
+ // GB9: × (Extend | ZWJ)
+ if (this_rune == ZERO_WIDTH_JOINER) {
+ state.continue_sequence = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_gcb_extend_class(this_rune)) {
+ // (Support for GB9c.)
+ if (state.current_sequence == Indic) {
+ if (ucg_is_indic_conjunct_break_extend(this_rune) && (
+ ucg_is_indic_conjunct_break_linker(state.last_rune) ||
+ ucg_is_indic_conjunct_break_consonant(state.last_rune) ))
+ {
+ state.continue_sequence = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_indic_conjunct_break_linker(this_rune) && (
+ ucg_is_indic_conjunct_break_linker(state.last_rune) ||
+ ucg_is_indic_conjunct_break_extend(state.last_rune) ||
+ ucg_is_indic_conjunct_break_consonant(state.last_rune) ))
+ {
+ state.continue_sequence = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // (Support for GB11.)
+ if (state.current_sequence == Emoji && (
+ ucg_is_gcb_extend_class(state.last_rune) ||
+ ucg_is_emoji_extended_pictographic(state.last_rune) ))
+ {
+ state.continue_sequence = true;
+ }
+
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // _The GB9a and GB9b rules only apply to extended grapheme clusters:_
+ // "Do not break before SpacingMarks, or after Prepend characters."
+ //
+ // GB9a: × SpacingMark
+ // GB9b: Prepend ×
+ if (ucg_is_spacing_mark(this_rune)) {
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_gcb_prepend_class(this_rune)) {
+ state.grapheme_count += 1;
+ state.bypass_next_rune = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // _The GB9c rule only applies to extended grapheme clusters:_
+ // "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker."
+ //
+ // GB9c: \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant}
+ if (ucg_is_indic_conjunct_break_consonant(this_rune)) {
+ if (state.current_sequence == Indic) {
+ if (state.last_rune == ZERO_WIDTH_JOINER ||
+ ucg_is_indic_conjunct_break_linker(state.last_rune))
+ {
+ state.continue_sequence = true;
+ } else {
+ state.grapheme_count += 1;
+ }
+ } else {
+ state.grapheme_count += 1;
+ state.current_sequence = Indic;
+ state.continue_sequence = true;
+ }
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_indic_conjunct_break_extend(this_rune)) {
+ if (state.current_sequence == Indic) {
+ if (ucg_is_indic_conjunct_break_consonant(state.last_rune) ||
+ ucg_is_indic_conjunct_break_linker(state.last_rune))
+ {
+ state.continue_sequence = true;
+ } else {
+ state.grapheme_count += 1;
+ }
+ }
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ if (ucg_is_indic_conjunct_break_linker(this_rune)) {
+ if (state.current_sequence == Indic) {
+ if (ucg_is_indic_conjunct_break_extend(state.last_rune) ||
+ ucg_is_indic_conjunct_break_linker(state.last_rune))
+ {
+ state.continue_sequence = true;
+ } else {
+ state.grapheme_count += 1;
+ }
+ }
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ //
+ // (Curiously, there is no GB10.)
+ //
+
+ // "Do not break within emoji modifier sequences or emoji zwj sequences."
+ //
+ // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
+ if (ucg_is_emoji_extended_pictographic(this_rune)) {
+ if (state.current_sequence != Emoji || state.last_rune != ZERO_WIDTH_JOINER) {
+ state.grapheme_count += 1;
+ }
+ state.current_sequence = Emoji;
+ state.continue_sequence = true;
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // "Do not break within emoji flag sequences.
+ // That is, do not break between regional indicator (RI) symbols
+ // if there is an odd number of RI characters before the break point."
+ //
+ // GB12: sot (RI RI)* RI × RI
+ // GB13: [^RI] (RI RI)* RI × RI
+ if (ucg_is_regional_indicator(this_rune)) {
+ if ((state.regional_indicator_counter & 1) == 0) {
+ state.grapheme_count += 1;
+ }
+
+ state.current_sequence = Regional;
+ state.continue_sequence = true;
+ state.regional_indicator_counter += 1;
+
+ UCG_DEFERRED_DECODE_STEP(); continue;
+ }
+
+ // "Otherwise, break everywhere."
+ //
+ // GB999: Any ÷ Any
+ state.grapheme_count += 1;
+ UCG_DEFERRED_DECODE_STEP();
+ }
+
+#undef UCG_DEFERRED_DECODE_STEP
+
+ if (out_graphemes != NULL) { *out_graphemes = state.graphemes; }
+ if (out_rune_count != NULL) { *out_rune_count = state.rune_count; }
+ if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; }
+ if (out_width != NULL) { *out_width = state.width; }
+
+ return 0;
+}
+
+#undef UCG_TABLE_LEN
+#undef ZERO_WIDTH_SPACE
+#undef ZERO_WIDTH_NON_JOINER
+#undef ZERO_WIDTH_JOINER
+#undef WORD_JOINER
diff --git a/src/ucg/ucg_tables.h b/src/ucg/ucg_tables.h
new file mode 100644
index 000000000..a33f9f898
--- /dev/null
+++ b/src/ucg/ucg_tables.h
@@ -0,0 +1,2629 @@
+/*
+ * SPDX-FileCopyrightText: (c) 2024 Feoramund
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef _UCG_TABLES_INCLUDED
+#define _UCG_TABLES_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+//
+// The tables below are accurate as of Unicode 15.1.0.
+//
+
+static const int32_t ucg_spacing_mark_ranges[] = {
+ 0x0903, 0x0903,
+ 0x093B, 0x093B,
+ 0x093E, 0x0940,
+ 0x0949, 0x094C,
+ 0x094E, 0x094F,
+ 0x0982, 0x0983,
+ 0x09BE, 0x09C0,
+ 0x09C7, 0x09C8,
+ 0x09CB, 0x09CC,
+ 0x09D7, 0x09D7,
+ 0x0A03, 0x0A03,
+ 0x0A3E, 0x0A40,
+ 0x0A83, 0x0A83,
+ 0x0ABE, 0x0AC0,
+ 0x0AC9, 0x0AC9,
+ 0x0ACB, 0x0ACC,
+ 0x0B02, 0x0B03,
+ 0x0B3E, 0x0B3E,
+ 0x0B40, 0x0B40,
+ 0x0B47, 0x0B48,
+ 0x0B4B, 0x0B4C,
+ 0x0B57, 0x0B57,
+ 0x0BBE, 0x0BBF,
+ 0x0BC1, 0x0BC2,
+ 0x0BC6, 0x0BC8,
+ 0x0BCA, 0x0BCC,
+ 0x0BD7, 0x0BD7,
+ 0x0C01, 0x0C03,
+ 0x0C41, 0x0C44,
+ 0x0C82, 0x0C83,
+ 0x0CBE, 0x0CBE,
+ 0x0CC0, 0x0CC4,
+ 0x0CC7, 0x0CC8,
+ 0x0CCA, 0x0CCB,
+ 0x0CD5, 0x0CD6,
+ 0x0CF3, 0x0CF3,
+ 0x0D02, 0x0D03,
+ 0x0D3E, 0x0D40,
+ 0x0D46, 0x0D48,
+ 0x0D4A, 0x0D4C,
+ 0x0D57, 0x0D57,
+ 0x0D82, 0x0D83,
+ 0x0DCF, 0x0DD1,
+ 0x0DD8, 0x0DDF,
+ 0x0DF2, 0x0DF3,
+ 0x0F3E, 0x0F3F,
+ 0x0F7F, 0x0F7F,
+ 0x102B, 0x102C,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x103B, 0x103C,
+ 0x1056, 0x1057,
+ 0x1062, 0x1064,
+ 0x1067, 0x106D,
+ 0x1083, 0x1084,
+ 0x1087, 0x108C,
+ 0x108F, 0x108F,
+ 0x109A, 0x109C,
+ 0x1715, 0x1715,
+ 0x1734, 0x1734,
+ 0x17B6, 0x17B6,
+ 0x17BE, 0x17C5,
+ 0x17C7, 0x17C8,
+ 0x1923, 0x1926,
+ 0x1929, 0x192B,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x1A19, 0x1A1A,
+ 0x1A55, 0x1A55,
+ 0x1A57, 0x1A57,
+ 0x1A61, 0x1A61,
+ 0x1A63, 0x1A64,
+ 0x1A6D, 0x1A72,
+ 0x1B04, 0x1B04,
+ 0x1B35, 0x1B35,
+ 0x1B3B, 0x1B3B,
+ 0x1B3D, 0x1B41,
+ 0x1B43, 0x1B44,
+ 0x1B82, 0x1B82,
+ 0x1BA1, 0x1BA1,
+ 0x1BA6, 0x1BA7,
+ 0x1BAA, 0x1BAA,
+ 0x1BE7, 0x1BE7,
+ 0x1BEA, 0x1BEC,
+ 0x1BEE, 0x1BEE,
+ 0x1BF2, 0x1BF3,
+ 0x1C24, 0x1C2B,
+ 0x1C34, 0x1C35,
+ 0x1CE1, 0x1CE1,
+ 0x1CF7, 0x1CF7,
+ 0x302E, 0x302F,
+ 0xA823, 0xA824,
+ 0xA827, 0xA827,
+ 0xA880, 0xA881,
+ 0xA8B4, 0xA8C3,
+ 0xA952, 0xA953,
+ 0xA983, 0xA983,
+ 0xA9B4, 0xA9B5,
+ 0xA9BA, 0xA9BB,
+ 0xA9BE, 0xA9C0,
+ 0xAA2F, 0xAA30,
+ 0xAA33, 0xAA34,
+ 0xAA4D, 0xAA4D,
+ 0xAA7B, 0xAA7B,
+ 0xAA7D, 0xAA7D,
+ 0xAAEB, 0xAAEB,
+ 0xAAEE, 0xAAEF,
+ 0xAAF5, 0xAAF5,
+ 0xABE3, 0xABE4,
+ 0xABE6, 0xABE7,
+ 0xABE9, 0xABEA,
+ 0xABEC, 0xABEC,
+ 0x11000, 0x11000,
+ 0x11002, 0x11002,
+ 0x11082, 0x11082,
+ 0x110B0, 0x110B2,
+ 0x110B7, 0x110B8,
+ 0x1112C, 0x1112C,
+ 0x11145, 0x11146,
+ 0x11182, 0x11182,
+ 0x111B3, 0x111B5,
+ 0x111BF, 0x111C0,
+ 0x111CE, 0x111CE,
+ 0x1122C, 0x1122E,
+ 0x11232, 0x11233,
+ 0x11235, 0x11235,
+ 0x112E0, 0x112E2,
+ 0x11302, 0x11303,
+ 0x1133E, 0x1133F,
+ 0x11341, 0x11344,
+ 0x11347, 0x11348,
+ 0x1134B, 0x1134D,
+ 0x11357, 0x11357,
+ 0x11362, 0x11363,
+ 0x11435, 0x11437,
+ 0x11440, 0x11441,
+ 0x11445, 0x11445,
+ 0x114B0, 0x114B2,
+ 0x114B9, 0x114B9,
+ 0x114BB, 0x114BE,
+ 0x114C1, 0x114C1,
+ 0x115AF, 0x115B1,
+ 0x115B8, 0x115BB,
+ 0x115BE, 0x115BE,
+ 0x11630, 0x11632,
+ 0x1163B, 0x1163C,
+ 0x1163E, 0x1163E,
+ 0x116AC, 0x116AC,
+ 0x116AE, 0x116AF,
+ 0x116B6, 0x116B6,
+ 0x11720, 0x11721,
+ 0x11726, 0x11726,
+ 0x1182C, 0x1182E,
+ 0x11838, 0x11838,
+ 0x11930, 0x11935,
+ 0x11937, 0x11938,
+ 0x1193D, 0x1193D,
+ 0x11940, 0x11940,
+ 0x11942, 0x11942,
+ 0x119D1, 0x119D3,
+ 0x119DC, 0x119DF,
+ 0x119E4, 0x119E4,
+ 0x11A39, 0x11A39,
+ 0x11A57, 0x11A58,
+ 0x11A97, 0x11A97,
+ 0x11C2F, 0x11C2F,
+ 0x11C3E, 0x11C3E,
+ 0x11CA9, 0x11CA9,
+ 0x11CB1, 0x11CB1,
+ 0x11CB4, 0x11CB4,
+ 0x11D8A, 0x11D8E,
+ 0x11D93, 0x11D94,
+ 0x11D96, 0x11D96,
+ 0x11EF5, 0x11EF6,
+ 0x11F03, 0x11F03,
+ 0x11F34, 0x11F35,
+ 0x11F3E, 0x11F3F,
+ 0x11F41, 0x11F41,
+ 0x16F51, 0x16F87,
+ 0x16FF0, 0x16FF1,
+ 0x1D165, 0x1D166,
+ 0x1D16D, 0x1D172,
+};
+
+static const int32_t ucg_nonspacing_mark_ranges[] = {
+ 0x0300, 0x036F,
+ 0x0483, 0x0487,
+ 0x0591, 0x05BD,
+ 0x05BF, 0x05BF,
+ 0x05C1, 0x05C2,
+ 0x05C4, 0x05C5,
+ 0x05C7, 0x05C7,
+ 0x0610, 0x061A,
+ 0x064B, 0x065F,
+ 0x0670, 0x0670,
+ 0x06D6, 0x06DC,
+ 0x06DF, 0x06E4,
+ 0x06E7, 0x06E8,
+ 0x06EA, 0x06ED,
+ 0x0711, 0x0711,
+ 0x0730, 0x074A,
+ 0x07A6, 0x07B0,
+ 0x07EB, 0x07F3,
+ 0x07FD, 0x07FD,
+ 0x0816, 0x0819,
+ 0x081B, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082D,
+ 0x0859, 0x085B,
+ 0x0898, 0x089F,
+ 0x08CA, 0x08E1,
+ 0x08E3, 0x0902,
+ 0x093A, 0x093A,
+ 0x093C, 0x093C,
+ 0x0941, 0x0948,
+ 0x094D, 0x094D,
+ 0x0951, 0x0957,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09BC, 0x09BC,
+ 0x09C1, 0x09C4,
+ 0x09CD, 0x09CD,
+ 0x09E2, 0x09E3,
+ 0x09FE, 0x09FE,
+ 0x0A01, 0x0A02,
+ 0x0A3C, 0x0A3C,
+ 0x0A41, 0x0A42,
+ 0x0A47, 0x0A48,
+ 0x0A4B, 0x0A4D,
+ 0x0A51, 0x0A51,
+ 0x0A70, 0x0A71,
+ 0x0A75, 0x0A75,
+ 0x0A81, 0x0A82,
+ 0x0ABC, 0x0ABC,
+ 0x0AC1, 0x0AC5,
+ 0x0AC7, 0x0AC8,
+ 0x0ACD, 0x0ACD,
+ 0x0AE2, 0x0AE3,
+ 0x0AFA, 0x0AFF,
+ 0x0B01, 0x0B01,
+ 0x0B3C, 0x0B3C,
+ 0x0B3F, 0x0B3F,
+ 0x0B41, 0x0B44,
+ 0x0B4D, 0x0B4D,
+ 0x0B55, 0x0B56,
+ 0x0B62, 0x0B63,
+ 0x0B82, 0x0B82,
+ 0x0BC0, 0x0BC0,
+ 0x0BCD, 0x0BCD,
+ 0x0C00, 0x0C00,
+ 0x0C04, 0x0C04,
+ 0x0C3C, 0x0C3C,
+ 0x0C3E, 0x0C40,
+ 0x0C46, 0x0C48,
+ 0x0C4A, 0x0C4D,
+ 0x0C55, 0x0C56,
+ 0x0C62, 0x0C63,
+ 0x0C81, 0x0C81,
+ 0x0CBC, 0x0CBC,
+ 0x0CBF, 0x0CBF,
+ 0x0CC6, 0x0CC6,
+ 0x0CCC, 0x0CCD,
+ 0x0CE2, 0x0CE3,
+ 0x0D00, 0x0D01,
+ 0x0D3B, 0x0D3C,
+ 0x0D41, 0x0D44,
+ 0x0D4D, 0x0D4D,
+ 0x0D62, 0x0D63,
+ 0x0D81, 0x0D81,
+ 0x0DCA, 0x0DCA,
+ 0x0DD2, 0x0DD4,
+ 0x0DD6, 0x0DD6,
+ 0x0E31, 0x0E31,
+ 0x0E34, 0x0E3A,
+ 0x0E47, 0x0E4E,
+ 0x0EB1, 0x0EB1,
+ 0x0EB4, 0x0EBC,
+ 0x0EC8, 0x0ECE,
+ 0x0F18, 0x0F19,
+ 0x0F35, 0x0F35,
+ 0x0F37, 0x0F37,
+ 0x0F39, 0x0F39,
+ 0x0F71, 0x0F7E,
+ 0x0F80, 0x0F84,
+ 0x0F86, 0x0F87,
+ 0x0F8D, 0x0F97,
+ 0x0F99, 0x0FBC,
+ 0x0FC6, 0x0FC6,
+ 0x102D, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103A,
+ 0x103D, 0x103E,
+ 0x1058, 0x1059,
+ 0x105E, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108D, 0x108D,
+ 0x109D, 0x109D,
+ 0x135D, 0x135F,
+ 0x1712, 0x1714,
+ 0x1732, 0x1733,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17B4, 0x17B5,
+ 0x17B7, 0x17BD,
+ 0x17C6, 0x17C6,
+ 0x17C9, 0x17D3,
+ 0x17DD, 0x17DD,
+ 0x180B, 0x180D,
+ 0x180F, 0x180F,
+ 0x1885, 0x1886,
+ 0x18A9, 0x18A9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193B,
+ 0x1A17, 0x1A18,
+ 0x1A1B, 0x1A1B,
+ 0x1A56, 0x1A56,
+ 0x1A58, 0x1A5E,
+ 0x1A60, 0x1A60,
+ 0x1A62, 0x1A62,
+ 0x1A65, 0x1A6C,
+ 0x1A73, 0x1A7C,
+ 0x1A7F, 0x1A7F,
+ 0x1AB0, 0x1ABD,
+ 0x1ABF, 0x1ACE,
+ 0x1B00, 0x1B03,
+ 0x1B34, 0x1B34,
+ 0x1B36, 0x1B3A,
+ 0x1B3C, 0x1B3C,
+ 0x1B42, 0x1B42,
+ 0x1B6B, 0x1B73,
+ 0x1B80, 0x1B81,
+ 0x1BA2, 0x1BA5,
+ 0x1BA8, 0x1BA9,
+ 0x1BAB, 0x1BAD,
+ 0x1BE6, 0x1BE6,
+ 0x1BE8, 0x1BE9,
+ 0x1BED, 0x1BED,
+ 0x1BEF, 0x1BF1,
+ 0x1C2C, 0x1C33,
+ 0x1C36, 0x1C37,
+ 0x1CD0, 0x1CD2,
+ 0x1CD4, 0x1CE0,
+ 0x1CE2, 0x1CE8,
+ 0x1CED, 0x1CED,
+ 0x1CF4, 0x1CF4,
+ 0x1CF8, 0x1CF9,
+ 0x1DC0, 0x1DFF,
+ 0x20D0, 0x20DC,
+ 0x20E1, 0x20E1,
+ 0x20E5, 0x20F0,
+ 0x2CEF, 0x2CF1,
+ 0x2D7F, 0x2D7F,
+ 0x2DE0, 0x2DFF,
+ 0x302A, 0x302D,
+ 0x3099, 0x309A,
+ 0xA66F, 0xA66F,
+ 0xA674, 0xA67D,
+ 0xA69E, 0xA69F,
+ 0xA6F0, 0xA6F1,
+ 0xA802, 0xA802,
+ 0xA806, 0xA806,
+ 0xA80B, 0xA80B,
+ 0xA825, 0xA826,
+ 0xA82C, 0xA82C,
+ 0xA8C4, 0xA8C5,
+ 0xA8E0, 0xA8F1,
+ 0xA8FF, 0xA8FF,
+ 0xA926, 0xA92D,
+ 0xA947, 0xA951,
+ 0xA980, 0xA982,
+ 0xA9B3, 0xA9B3,
+ 0xA9B6, 0xA9B9,
+ 0xA9BC, 0xA9BD,
+ 0xA9E5, 0xA9E5,
+ 0xAA29, 0xAA2E,
+ 0xAA31, 0xAA32,
+ 0xAA35, 0xAA36,
+ 0xAA43, 0xAA43,
+ 0xAA4C, 0xAA4C,
+ 0xAA7C, 0xAA7C,
+ 0xAAB0, 0xAAB0,
+ 0xAAB2, 0xAAB4,
+ 0xAAB7, 0xAAB8,
+ 0xAABE, 0xAABF,
+ 0xAAC1, 0xAAC1,
+ 0xAAEC, 0xAAED,
+ 0xAAF6, 0xAAF6,
+ 0xABE5, 0xABE5,
+ 0xABE8, 0xABE8,
+ 0xABED, 0xABED,
+ 0xFB1E, 0xFB1E,
+ 0xFE00, 0xFE0F,
+ 0xFE20, 0xFE2F,
+ 0x101FD, 0x101FD,
+ 0x102E0, 0x102E0,
+ 0x10376, 0x1037A,
+ 0x10A01, 0x10A03,
+ 0x10A05, 0x10A06,
+ 0x10A0C, 0x10A0F,
+ 0x10A38, 0x10A3A,
+ 0x10A3F, 0x10A3F,
+ 0x10AE5, 0x10AE6,
+ 0x10D24, 0x10D27,
+ 0x10EAB, 0x10EAC,
+ 0x10EFD, 0x10EFF,
+ 0x10F46, 0x10F50,
+ 0x10F82, 0x10F85,
+ 0x11001, 0x11001,
+ 0x11038, 0x11046,
+ 0x11070, 0x11070,
+ 0x11073, 0x11074,
+ 0x1107F, 0x11081,
+ 0x110B3, 0x110B6,
+ 0x110B9, 0x110BA,
+ 0x110C2, 0x110C2,
+ 0x11100, 0x11102,
+ 0x11127, 0x1112B,
+ 0x1112D, 0x11134,
+ 0x11173, 0x11173,
+ 0x11180, 0x11181,
+ 0x111B6, 0x111BE,
+ 0x111C9, 0x111CC,
+ 0x111CF, 0x111CF,
+ 0x1122F, 0x11231,
+ 0x11234, 0x11234,
+ 0x11236, 0x11237,
+ 0x1123E, 0x1123E,
+ 0x11241, 0x11241,
+ 0x112DF, 0x112DF,
+ 0x112E3, 0x112EA,
+ 0x11300, 0x11301,
+ 0x1133B, 0x1133C,
+ 0x11340, 0x11340,
+ 0x11366, 0x1136C,
+ 0x11370, 0x11374,
+ 0x11438, 0x1143F,
+ 0x11442, 0x11444,
+ 0x11446, 0x11446,
+ 0x1145E, 0x1145E,
+ 0x114B3, 0x114B8,
+ 0x114BA, 0x114BA,
+ 0x114BF, 0x114C0,
+ 0x114C2, 0x114C3,
+ 0x115B2, 0x115B5,
+ 0x115BC, 0x115BD,
+ 0x115BF, 0x115C0,
+ 0x115DC, 0x115DD,
+ 0x11633, 0x1163A,
+ 0x1163D, 0x1163D,
+ 0x1163F, 0x11640,
+ 0x116AB, 0x116AB,
+ 0x116AD, 0x116AD,
+ 0x116B0, 0x116B5,
+ 0x116B7, 0x116B7,
+ 0x1171D, 0x1171F,
+ 0x11722, 0x11725,
+ 0x11727, 0x1172B,
+ 0x1182F, 0x11837,
+ 0x11839, 0x1183A,
+ 0x1193B, 0x1193C,
+ 0x1193E, 0x1193E,
+ 0x11943, 0x11943,
+ 0x119D4, 0x119D7,
+ 0x119DA, 0x119DB,
+ 0x119E0, 0x119E0,
+ 0x11A01, 0x11A0A,
+ 0x11A33, 0x11A38,
+ 0x11A3B, 0x11A3E,
+ 0x11A47, 0x11A47,
+ 0x11A51, 0x11A56,
+ 0x11A59, 0x11A5B,
+ 0x11A8A, 0x11A96,
+ 0x11A98, 0x11A99,
+ 0x11C30, 0x11C36,
+ 0x11C38, 0x11C3D,
+ 0x11C3F, 0x11C3F,
+ 0x11C92, 0x11CA7,
+ 0x11CAA, 0x11CB0,
+ 0x11CB2, 0x11CB3,
+ 0x11CB5, 0x11CB6,
+ 0x11D31, 0x11D36,
+ 0x11D3A, 0x11D3A,
+ 0x11D3C, 0x11D3D,
+ 0x11D3F, 0x11D45,
+ 0x11D47, 0x11D47,
+ 0x11D90, 0x11D91,
+ 0x11D95, 0x11D95,
+ 0x11D97, 0x11D97,
+ 0x11EF3, 0x11EF4,
+ 0x11F00, 0x11F01,
+ 0x11F36, 0x11F3A,
+ 0x11F40, 0x11F40,
+ 0x11F42, 0x11F42,
+ 0x13440, 0x13440,
+ 0x13447, 0x13455,
+ 0x16AF0, 0x16AF4,
+ 0x16B30, 0x16B36,
+ 0x16F4F, 0x16F4F,
+ 0x16F8F, 0x16F92,
+ 0x16FE4, 0x16FE4,
+ 0x1BC9D, 0x1BC9E,
+ 0x1CF00, 0x1CF2D,
+ 0x1CF30, 0x1CF46,
+ 0x1D167, 0x1D169,
+ 0x1D17B, 0x1D182,
+ 0x1D185, 0x1D18B,
+ 0x1D1AA, 0x1D1AD,
+ 0x1D242, 0x1D244,
+ 0x1DA00, 0x1DA36,
+ 0x1DA3B, 0x1DA6C,
+ 0x1DA75, 0x1DA75,
+ 0x1DA84, 0x1DA84,
+ 0x1DA9B, 0x1DA9F,
+ 0x1DAA1, 0x1DAAF,
+ 0x1E000, 0x1E006,
+ 0x1E008, 0x1E018,
+ 0x1E01B, 0x1E021,
+ 0x1E023, 0x1E024,
+ 0x1E026, 0x1E02A,
+ 0x1E08F, 0x1E08F,
+ 0x1E130, 0x1E136,
+ 0x1E2AE, 0x1E2AE,
+ 0x1E2EC, 0x1E2EF,
+ 0x1E4EC, 0x1E4EF,
+ 0x1E8D0, 0x1E8D6,
+ 0x1E944, 0x1E94A,
+ 0xE0100, 0xE01EF,
+};
+
+static const int32_t ucg_emoji_extended_pictographic_ranges[] = {
+ 0x00A9, 0x00A9,
+ 0x00AE, 0x00AE,
+ 0x203C, 0x203C,
+ 0x2049, 0x2049,
+ 0x2122, 0x2122,
+ 0x2139, 0x2139,
+ 0x2194, 0x2199,
+ 0x21A9, 0x21AA,
+ 0x231A, 0x231B,
+ 0x2328, 0x2328,
+ 0x2388, 0x2388,
+ 0x23CF, 0x23CF,
+ 0x23E9, 0x23EC,
+ 0x23ED, 0x23EE,
+ 0x23EF, 0x23EF,
+ 0x23F0, 0x23F0,
+ 0x23F1, 0x23F2,
+ 0x23F3, 0x23F3,
+ 0x23F8, 0x23FA,
+ 0x24C2, 0x24C2,
+ 0x25AA, 0x25AB,
+ 0x25B6, 0x25B6,
+ 0x25C0, 0x25C0,
+ 0x25FB, 0x25FE,
+ 0x2600, 0x2601,
+ 0x2602, 0x2603,
+ 0x2604, 0x2604,
+ 0x2605, 0x2605,
+ 0x2607, 0x260D,
+ 0x260E, 0x260E,
+ 0x260F, 0x2610,
+ 0x2611, 0x2611,
+ 0x2612, 0x2612,
+ 0x2614, 0x2615,
+ 0x2616, 0x2617,
+ 0x2618, 0x2618,
+ 0x2619, 0x261C,
+ 0x261D, 0x261D,
+ 0x261E, 0x261F,
+ 0x2620, 0x2620,
+ 0x2621, 0x2621,
+ 0x2622, 0x2623,
+ 0x2624, 0x2625,
+ 0x2626, 0x2626,
+ 0x2627, 0x2629,
+ 0x262A, 0x262A,
+ 0x262B, 0x262D,
+ 0x262E, 0x262E,
+ 0x262F, 0x262F,
+ 0x2630, 0x2637,
+ 0x2638, 0x2639,
+ 0x263A, 0x263A,
+ 0x263B, 0x263F,
+ 0x2640, 0x2640,
+ 0x2641, 0x2641,
+ 0x2642, 0x2642,
+ 0x2643, 0x2647,
+ 0x2648, 0x2653,
+ 0x2654, 0x265E,
+ 0x265F, 0x265F,
+ 0x2660, 0x2660,
+ 0x2661, 0x2662,
+ 0x2663, 0x2663,
+ 0x2664, 0x2664,
+ 0x2665, 0x2666,
+ 0x2667, 0x2667,
+ 0x2668, 0x2668,
+ 0x2669, 0x267A,
+ 0x267B, 0x267B,
+ 0x267C, 0x267D,
+ 0x267E, 0x267E,
+ 0x267F, 0x267F,
+ 0x2680, 0x2685,
+ 0x2690, 0x2691,
+ 0x2692, 0x2692,
+ 0x2693, 0x2693,
+ 0x2694, 0x2694,
+ 0x2695, 0x2695,
+ 0x2696, 0x2697,
+ 0x2698, 0x2698,
+ 0x2699, 0x2699,
+ 0x269A, 0x269A,
+ 0x269B, 0x269C,
+ 0x269D, 0x269F,
+ 0x26A0, 0x26A1,
+ 0x26A2, 0x26A6,
+ 0x26A7, 0x26A7,
+ 0x26A8, 0x26A9,
+ 0x26AA, 0x26AB,
+ 0x26AC, 0x26AF,
+ 0x26B0, 0x26B1,
+ 0x26B2, 0x26BC,
+ 0x26BD, 0x26BE,
+ 0x26BF, 0x26C3,
+ 0x26C4, 0x26C5,
+ 0x26C6, 0x26C7,
+ 0x26C8, 0x26C8,
+ 0x26C9, 0x26CD,
+ 0x26CE, 0x26CE,
+ 0x26CF, 0x26CF,
+ 0x26D0, 0x26D0,
+ 0x26D1, 0x26D1,
+ 0x26D2, 0x26D2,
+ 0x26D3, 0x26D3,
+ 0x26D4, 0x26D4,
+ 0x26D5, 0x26E8,
+ 0x26E9, 0x26E9,
+ 0x26EA, 0x26EA,
+ 0x26EB, 0x26EF,
+ 0x26F0, 0x26F1,
+ 0x26F2, 0x26F3,
+ 0x26F4, 0x26F4,
+ 0x26F5, 0x26F5,
+ 0x26F6, 0x26F6,
+ 0x26F7, 0x26F9,
+ 0x26FA, 0x26FA,
+ 0x26FB, 0x26FC,
+ 0x26FD, 0x26FD,
+ 0x26FE, 0x2701,
+ 0x2702, 0x2702,
+ 0x2703, 0x2704,
+ 0x2705, 0x2705,
+ 0x2708, 0x270C,
+ 0x270D, 0x270D,
+ 0x270E, 0x270E,
+ 0x270F, 0x270F,
+ 0x2710, 0x2711,
+ 0x2712, 0x2712,
+ 0x2714, 0x2714,
+ 0x2716, 0x2716,
+ 0x271D, 0x271D,
+ 0x2721, 0x2721,
+ 0x2728, 0x2728,
+ 0x2733, 0x2734,
+ 0x2744, 0x2744,
+ 0x2747, 0x2747,
+ 0x274C, 0x274C,
+ 0x274E, 0x274E,
+ 0x2753, 0x2755,
+ 0x2757, 0x2757,
+ 0x2763, 0x2763,
+ 0x2764, 0x2764,
+ 0x2765, 0x2767,
+ 0x2795, 0x2797,
+ 0x27A1, 0x27A1,
+ 0x27B0, 0x27B0,
+ 0x27BF, 0x27BF,
+ 0x2934, 0x2935,
+ 0x2B05, 0x2B07,
+ 0x2B1B, 0x2B1C,
+ 0x2B50, 0x2B50,
+ 0x2B55, 0x2B55,
+ 0x3030, 0x3030,
+ 0x303D, 0x303D,
+ 0x3297, 0x3297,
+ 0x3299, 0x3299,
+ 0x1F000, 0x1F003,
+ 0x1F004, 0x1F004,
+ 0x1F005, 0x1F0CE,
+ 0x1F0CF, 0x1F0CF,
+ 0x1F0D0, 0x1F0FF,
+ 0x1F10D, 0x1F10F,
+ 0x1F12F, 0x1F12F,
+ 0x1F16C, 0x1F16F,
+ 0x1F170, 0x1F171,
+ 0x1F17E, 0x1F17F,
+ 0x1F18E, 0x1F18E,
+ 0x1F191, 0x1F19A,
+ 0x1F1AD, 0x1F1E5,
+ 0x1F201, 0x1F202,
+ 0x1F203, 0x1F20F,
+ 0x1F21A, 0x1F21A,
+ 0x1F22F, 0x1F22F,
+ 0x1F232, 0x1F23A,
+ 0x1F23C, 0x1F23F,
+ 0x1F249, 0x1F24F,
+ 0x1F250, 0x1F251,
+ 0x1F252, 0x1F2FF,
+ 0x1F300, 0x1F30C,
+ 0x1F30D, 0x1F30E,
+ 0x1F30F, 0x1F30F,
+ 0x1F310, 0x1F310,
+ 0x1F311, 0x1F311,
+ 0x1F312, 0x1F312,
+ 0x1F313, 0x1F315,
+ 0x1F316, 0x1F318,
+ 0x1F319, 0x1F319,
+ 0x1F31A, 0x1F31A,
+ 0x1F31B, 0x1F31B,
+ 0x1F31C, 0x1F31C,
+ 0x1F31D, 0x1F31E,
+ 0x1F31F, 0x1F320,
+ 0x1F321, 0x1F321,
+ 0x1F322, 0x1F323,
+ 0x1F324, 0x1F32C,
+ 0x1F32D, 0x1F32F,
+ 0x1F330, 0x1F331,
+ 0x1F332, 0x1F333,
+ 0x1F334, 0x1F335,
+ 0x1F336, 0x1F336,
+ 0x1F337, 0x1F34A,
+ 0x1F34B, 0x1F34B,
+ 0x1F34C, 0x1F34F,
+ 0x1F350, 0x1F350,
+ 0x1F351, 0x1F37B,
+ 0x1F37C, 0x1F37C,
+ 0x1F37D, 0x1F37D,
+ 0x1F37E, 0x1F37F,
+ 0x1F380, 0x1F393,
+ 0x1F394, 0x1F395,
+ 0x1F396, 0x1F397,
+ 0x1F398, 0x1F398,
+ 0x1F399, 0x1F39B,
+ 0x1F39C, 0x1F39D,
+ 0x1F39E, 0x1F39F,
+ 0x1F3A0, 0x1F3C4,
+ 0x1F3C5, 0x1F3C5,
+ 0x1F3C6, 0x1F3C6,
+ 0x1F3C7, 0x1F3C7,
+ 0x1F3C8, 0x1F3C8,
+ 0x1F3C9, 0x1F3C9,
+ 0x1F3CA, 0x1F3CA,
+ 0x1F3CB, 0x1F3CE,
+ 0x1F3CF, 0x1F3D3,
+ 0x1F3D4, 0x1F3DF,
+ 0x1F3E0, 0x1F3E3,
+ 0x1F3E4, 0x1F3E4,
+ 0x1F3E5, 0x1F3F0,
+ 0x1F3F1, 0x1F3F2,
+ 0x1F3F3, 0x1F3F3,
+ 0x1F3F4, 0x1F3F4,
+ 0x1F3F5, 0x1F3F5,
+ 0x1F3F6, 0x1F3F6,
+ 0x1F3F7, 0x1F3F7,
+ 0x1F3F8, 0x1F3FA,
+ 0x1F400, 0x1F407,
+ 0x1F408, 0x1F408,
+ 0x1F409, 0x1F40B,
+ 0x1F40C, 0x1F40E,
+ 0x1F40F, 0x1F410,
+ 0x1F411, 0x1F412,
+ 0x1F413, 0x1F413,
+ 0x1F414, 0x1F414,
+ 0x1F415, 0x1F415,
+ 0x1F416, 0x1F416,
+ 0x1F417, 0x1F429,
+ 0x1F42A, 0x1F42A,
+ 0x1F42B, 0x1F43E,
+ 0x1F43F, 0x1F43F,
+ 0x1F440, 0x1F440,
+ 0x1F441, 0x1F441,
+ 0x1F442, 0x1F464,
+ 0x1F465, 0x1F465,
+ 0x1F466, 0x1F46B,
+ 0x1F46C, 0x1F46D,
+ 0x1F46E, 0x1F4AC,
+ 0x1F4AD, 0x1F4AD,
+ 0x1F4AE, 0x1F4B5,
+ 0x1F4B6, 0x1F4B7,
+ 0x1F4B8, 0x1F4EB,
+ 0x1F4EC, 0x1F4ED,
+ 0x1F4EE, 0x1F4EE,
+ 0x1F4EF, 0x1F4EF,
+ 0x1F4F0, 0x1F4F4,
+ 0x1F4F5, 0x1F4F5,
+ 0x1F4F6, 0x1F4F7,
+ 0x1F4F8, 0x1F4F8,
+ 0x1F4F9, 0x1F4FC,
+ 0x1F4FD, 0x1F4FD,
+ 0x1F4FE, 0x1F4FE,
+ 0x1F4FF, 0x1F502,
+ 0x1F503, 0x1F503,
+ 0x1F504, 0x1F507,
+ 0x1F508, 0x1F508,
+ 0x1F509, 0x1F509,
+ 0x1F50A, 0x1F514,
+ 0x1F515, 0x1F515,
+ 0x1F516, 0x1F52B,
+ 0x1F52C, 0x1F52D,
+ 0x1F52E, 0x1F53D,
+ 0x1F546, 0x1F548,
+ 0x1F549, 0x1F54A,
+ 0x1F54B, 0x1F54E,
+ 0x1F54F, 0x1F54F,
+ 0x1F550, 0x1F55B,
+ 0x1F55C, 0x1F567,
+ 0x1F568, 0x1F56E,
+ 0x1F56F, 0x1F570,
+ 0x1F571, 0x1F572,
+ 0x1F573, 0x1F579,
+ 0x1F57A, 0x1F57A,
+ 0x1F57B, 0x1F586,
+ 0x1F587, 0x1F587,
+ 0x1F588, 0x1F589,
+ 0x1F58A, 0x1F58D,
+ 0x1F58E, 0x1F58F,
+ 0x1F590, 0x1F590,
+ 0x1F591, 0x1F594,
+ 0x1F595, 0x1F596,
+ 0x1F597, 0x1F5A3,
+ 0x1F5A4, 0x1F5A4,
+ 0x1F5A5, 0x1F5A5,
+ 0x1F5A6, 0x1F5A7,
+ 0x1F5A8, 0x1F5A8,
+ 0x1F5A9, 0x1F5B0,
+ 0x1F5B1, 0x1F5B2,
+ 0x1F5B3, 0x1F5BB,
+ 0x1F5BC, 0x1F5BC,
+ 0x1F5BD, 0x1F5C1,
+ 0x1F5C2, 0x1F5C4,
+ 0x1F5C5, 0x1F5D0,
+ 0x1F5D1, 0x1F5D3,
+ 0x1F5D4, 0x1F5DB,
+ 0x1F5DC, 0x1F5DE,
+ 0x1F5DF, 0x1F5E0,
+ 0x1F5E1, 0x1F5E1,
+ 0x1F5E2, 0x1F5E2,
+ 0x1F5E3, 0x1F5E3,
+ 0x1F5E4, 0x1F5E7,
+ 0x1F5E8, 0x1F5E8,
+ 0x1F5E9, 0x1F5EE,
+ 0x1F5EF, 0x1F5EF,
+ 0x1F5F0, 0x1F5F2,
+ 0x1F5F3, 0x1F5F3,
+ 0x1F5F4, 0x1F5F9,
+ 0x1F5FA, 0x1F5FA,
+ 0x1F5FB, 0x1F5FF,
+ 0x1F600, 0x1F600,
+ 0x1F601, 0x1F606,
+ 0x1F607, 0x1F608,
+ 0x1F609, 0x1F60D,
+ 0x1F60E, 0x1F60E,
+ 0x1F60F, 0x1F60F,
+ 0x1F610, 0x1F610,
+ 0x1F611, 0x1F611,
+ 0x1F612, 0x1F614,
+ 0x1F615, 0x1F615,
+ 0x1F616, 0x1F616,
+ 0x1F617, 0x1F617,
+ 0x1F618, 0x1F618,
+ 0x1F619, 0x1F619,
+ 0x1F61A, 0x1F61A,
+ 0x1F61B, 0x1F61B,
+ 0x1F61C, 0x1F61E,
+ 0x1F61F, 0x1F61F,
+ 0x1F620, 0x1F625,
+ 0x1F626, 0x1F627,
+ 0x1F628, 0x1F62B,
+ 0x1F62C, 0x1F62C,
+ 0x1F62D, 0x1F62D,
+ 0x1F62E, 0x1F62F,
+ 0x1F630, 0x1F633,
+ 0x1F634, 0x1F634,
+ 0x1F635, 0x1F635,
+ 0x1F636, 0x1F636,
+ 0x1F637, 0x1F640,
+ 0x1F641, 0x1F644,
+ 0x1F645, 0x1F64F,
+ 0x1F680, 0x1F680,
+ 0x1F681, 0x1F682,
+ 0x1F683, 0x1F685,
+ 0x1F686, 0x1F686,
+ 0x1F687, 0x1F687,
+ 0x1F688, 0x1F688,
+ 0x1F689, 0x1F689,
+ 0x1F68A, 0x1F68B,
+ 0x1F68C, 0x1F68C,
+ 0x1F68D, 0x1F68D,
+ 0x1F68E, 0x1F68E,
+ 0x1F68F, 0x1F68F,
+ 0x1F690, 0x1F690,
+ 0x1F691, 0x1F693,
+ 0x1F694, 0x1F694,
+ 0x1F695, 0x1F695,
+ 0x1F696, 0x1F696,
+ 0x1F697, 0x1F697,
+ 0x1F698, 0x1F698,
+ 0x1F699, 0x1F69A,
+ 0x1F69B, 0x1F6A1,
+ 0x1F6A2, 0x1F6A2,
+ 0x1F6A3, 0x1F6A3,
+ 0x1F6A4, 0x1F6A5,
+ 0x1F6A6, 0x1F6A6,
+ 0x1F6A7, 0x1F6AD,
+ 0x1F6AE, 0x1F6B1,
+ 0x1F6B2, 0x1F6B2,
+ 0x1F6B3, 0x1F6B5,
+ 0x1F6B6, 0x1F6B6,
+ 0x1F6B7, 0x1F6B8,
+ 0x1F6B9, 0x1F6BE,
+ 0x1F6BF, 0x1F6BF,
+ 0x1F6C0, 0x1F6C0,
+ 0x1F6C1, 0x1F6C5,
+ 0x1F6C6, 0x1F6CA,
+ 0x1F6CB, 0x1F6CB,
+ 0x1F6CC, 0x1F6CC,
+ 0x1F6CD, 0x1F6CF,
+ 0x1F6D0, 0x1F6D0,
+ 0x1F6D1, 0x1F6D2,
+ 0x1F6D3, 0x1F6D4,
+ 0x1F6D5, 0x1F6D5,
+ 0x1F6D6, 0x1F6D7,
+ 0x1F6D8, 0x1F6DB,
+ 0x1F6DC, 0x1F6DC,
+ 0x1F6DD, 0x1F6DF,
+ 0x1F6E0, 0x1F6E5,
+ 0x1F6E6, 0x1F6E8,
+ 0x1F6E9, 0x1F6E9,
+ 0x1F6EA, 0x1F6EA,
+ 0x1F6EB, 0x1F6EC,
+ 0x1F6ED, 0x1F6EF,
+ 0x1F6F0, 0x1F6F0,
+ 0x1F6F1, 0x1F6F2,
+ 0x1F6F3, 0x1F6F3,
+ 0x1F6F4, 0x1F6F6,
+ 0x1F6F7, 0x1F6F8,
+ 0x1F6F9, 0x1F6F9,
+ 0x1F6FA, 0x1F6FA,
+ 0x1F6FB, 0x1F6FC,
+ 0x1F6FD, 0x1F6FF,
+ 0x1F774, 0x1F77F,
+ 0x1F7D5, 0x1F7DF,
+ 0x1F7E0, 0x1F7EB,
+ 0x1F7EC, 0x1F7EF,
+ 0x1F7F0, 0x1F7F0,
+ 0x1F7F1, 0x1F7FF,
+ 0x1F80C, 0x1F80F,
+ 0x1F848, 0x1F84F,
+ 0x1F85A, 0x1F85F,
+ 0x1F888, 0x1F88F,
+ 0x1F8AE, 0x1F8FF,
+ 0x1F90C, 0x1F90C,
+ 0x1F90D, 0x1F90F,
+ 0x1F910, 0x1F918,
+ 0x1F919, 0x1F91E,
+ 0x1F91F, 0x1F91F,
+ 0x1F920, 0x1F927,
+ 0x1F928, 0x1F92F,
+ 0x1F930, 0x1F930,
+ 0x1F931, 0x1F932,
+ 0x1F933, 0x1F93A,
+ 0x1F93C, 0x1F93E,
+ 0x1F93F, 0x1F93F,
+ 0x1F940, 0x1F945,
+ 0x1F947, 0x1F94B,
+ 0x1F94C, 0x1F94C,
+ 0x1F94D, 0x1F94F,
+ 0x1F950, 0x1F95E,
+ 0x1F95F, 0x1F96B,
+ 0x1F96C, 0x1F970,
+ 0x1F971, 0x1F971,
+ 0x1F972, 0x1F972,
+ 0x1F973, 0x1F976,
+ 0x1F977, 0x1F978,
+ 0x1F979, 0x1F979,
+ 0x1F97A, 0x1F97A,
+ 0x1F97B, 0x1F97B,
+ 0x1F97C, 0x1F97F,
+ 0x1F980, 0x1F984,
+ 0x1F985, 0x1F991,
+ 0x1F992, 0x1F997,
+ 0x1F998, 0x1F9A2,
+ 0x1F9A3, 0x1F9A4,
+ 0x1F9A5, 0x1F9AA,
+ 0x1F9AB, 0x1F9AD,
+ 0x1F9AE, 0x1F9AF,
+ 0x1F9B0, 0x1F9B9,
+ 0x1F9BA, 0x1F9BF,
+ 0x1F9C0, 0x1F9C0,
+ 0x1F9C1, 0x1F9C2,
+ 0x1F9C3, 0x1F9CA,
+ 0x1F9CB, 0x1F9CB,
+ 0x1F9CC, 0x1F9CC,
+ 0x1F9CD, 0x1F9CF,
+ 0x1F9D0, 0x1F9E6,
+ 0x1F9E7, 0x1F9FF,
+ 0x1FA00, 0x1FA6F,
+ 0x1FA70, 0x1FA73,
+ 0x1FA74, 0x1FA74,
+ 0x1FA75, 0x1FA77,
+ 0x1FA78, 0x1FA7A,
+ 0x1FA7B, 0x1FA7C,
+ 0x1FA7D, 0x1FA7F,
+ 0x1FA80, 0x1FA82,
+ 0x1FA83, 0x1FA86,
+ 0x1FA87, 0x1FA88,
+ 0x1FA89, 0x1FA8F,
+ 0x1FA90, 0x1FA95,
+ 0x1FA96, 0x1FAA8,
+ 0x1FAA9, 0x1FAAC,
+ 0x1FAAD, 0x1FAAF,
+ 0x1FAB0, 0x1FAB6,
+ 0x1FAB7, 0x1FABA,
+ 0x1FABB, 0x1FABD,
+ 0x1FABE, 0x1FABE,
+ 0x1FABF, 0x1FABF,
+ 0x1FAC0, 0x1FAC2,
+ 0x1FAC3, 0x1FAC5,
+ 0x1FAC6, 0x1FACD,
+ 0x1FACE, 0x1FACF,
+ 0x1FAD0, 0x1FAD6,
+ 0x1FAD7, 0x1FAD9,
+ 0x1FADA, 0x1FADB,
+ 0x1FADC, 0x1FADF,
+ 0x1FAE0, 0x1FAE7,
+ 0x1FAE8, 0x1FAE8,
+ 0x1FAE9, 0x1FAEF,
+ 0x1FAF0, 0x1FAF6,
+ 0x1FAF7, 0x1FAF8,
+ 0x1FAF9, 0x1FAFF,
+ 0x1FC00, 0x1FFFD,
+};
+
+static const int32_t ucg_grapheme_extend_ranges[] = {
+ 0x0300, 0x036F,
+ 0x0483, 0x0487,
+ 0x0488, 0x0489,
+ 0x0591, 0x05BD,
+ 0x05BF, 0x05BF,
+ 0x05C1, 0x05C2,
+ 0x05C4, 0x05C5,
+ 0x05C7, 0x05C7,
+ 0x0610, 0x061A,
+ 0x064B, 0x065F,
+ 0x0670, 0x0670,
+ 0x06D6, 0x06DC,
+ 0x06DF, 0x06E4,
+ 0x06E7, 0x06E8,
+ 0x06EA, 0x06ED,
+ 0x0711, 0x0711,
+ 0x0730, 0x074A,
+ 0x07A6, 0x07B0,
+ 0x07EB, 0x07F3,
+ 0x07FD, 0x07FD,
+ 0x0816, 0x0819,
+ 0x081B, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082D,
+ 0x0859, 0x085B,
+ 0x0898, 0x089F,
+ 0x08CA, 0x08E1,
+ 0x08E3, 0x0902,
+ 0x093A, 0x093A,
+ 0x093C, 0x093C,
+ 0x0941, 0x0948,
+ 0x094D, 0x094D,
+ 0x0951, 0x0957,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09BC, 0x09BC,
+ 0x09BE, 0x09BE,
+ 0x09C1, 0x09C4,
+ 0x09CD, 0x09CD,
+ 0x09D7, 0x09D7,
+ 0x09E2, 0x09E3,
+ 0x09FE, 0x09FE,
+ 0x0A01, 0x0A02,
+ 0x0A3C, 0x0A3C,
+ 0x0A41, 0x0A42,
+ 0x0A47, 0x0A48,
+ 0x0A4B, 0x0A4D,
+ 0x0A51, 0x0A51,
+ 0x0A70, 0x0A71,
+ 0x0A75, 0x0A75,
+ 0x0A81, 0x0A82,
+ 0x0ABC, 0x0ABC,
+ 0x0AC1, 0x0AC5,
+ 0x0AC7, 0x0AC8,
+ 0x0ACD, 0x0ACD,
+ 0x0AE2, 0x0AE3,
+ 0x0AFA, 0x0AFF,
+ 0x0B01, 0x0B01,
+ 0x0B3C, 0x0B3C,
+ 0x0B3E, 0x0B3E,
+ 0x0B3F, 0x0B3F,
+ 0x0B41, 0x0B44,
+ 0x0B4D, 0x0B4D,
+ 0x0B55, 0x0B56,
+ 0x0B57, 0x0B57,
+ 0x0B62, 0x0B63,
+ 0x0B82, 0x0B82,
+ 0x0BBE, 0x0BBE,
+ 0x0BC0, 0x0BC0,
+ 0x0BCD, 0x0BCD,
+ 0x0BD7, 0x0BD7,
+ 0x0C00, 0x0C00,
+ 0x0C04, 0x0C04,
+ 0x0C3C, 0x0C3C,
+ 0x0C3E, 0x0C40,
+ 0x0C46, 0x0C48,
+ 0x0C4A, 0x0C4D,
+ 0x0C55, 0x0C56,
+ 0x0C62, 0x0C63,
+ 0x0C81, 0x0C81,
+ 0x0CBC, 0x0CBC,
+ 0x0CBF, 0x0CBF,
+ 0x0CC2, 0x0CC2,
+ 0x0CC6, 0x0CC6,
+ 0x0CCC, 0x0CCD,
+ 0x0CD5, 0x0CD6,
+ 0x0CE2, 0x0CE3,
+ 0x0D00, 0x0D01,
+ 0x0D3B, 0x0D3C,
+ 0x0D3E, 0x0D3E,
+ 0x0D41, 0x0D44,
+ 0x0D4D, 0x0D4D,
+ 0x0D57, 0x0D57,
+ 0x0D62, 0x0D63,
+ 0x0D81, 0x0D81,
+ 0x0DCA, 0x0DCA,
+ 0x0DCF, 0x0DCF,
+ 0x0DD2, 0x0DD4,
+ 0x0DD6, 0x0DD6,
+ 0x0DDF, 0x0DDF,
+ 0x0E31, 0x0E31,
+ 0x0E34, 0x0E3A,
+ 0x0E47, 0x0E4E,
+ 0x0EB1, 0x0EB1,
+ 0x0EB4, 0x0EBC,
+ 0x0EC8, 0x0ECE,
+ 0x0F18, 0x0F19,
+ 0x0F35, 0x0F35,
+ 0x0F37, 0x0F37,
+ 0x0F39, 0x0F39,
+ 0x0F71, 0x0F7E,
+ 0x0F80, 0x0F84,
+ 0x0F86, 0x0F87,
+ 0x0F8D, 0x0F97,
+ 0x0F99, 0x0FBC,
+ 0x0FC6, 0x0FC6,
+ 0x102D, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103A,
+ 0x103D, 0x103E,
+ 0x1058, 0x1059,
+ 0x105E, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108D, 0x108D,
+ 0x109D, 0x109D,
+ 0x135D, 0x135F,
+ 0x1712, 0x1714,
+ 0x1732, 0x1733,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17B4, 0x17B5,
+ 0x17B7, 0x17BD,
+ 0x17C6, 0x17C6,
+ 0x17C9, 0x17D3,
+ 0x17DD, 0x17DD,
+ 0x180B, 0x180D,
+ 0x180F, 0x180F,
+ 0x1885, 0x1886,
+ 0x18A9, 0x18A9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193B,
+ 0x1A17, 0x1A18,
+ 0x1A1B, 0x1A1B,
+ 0x1A56, 0x1A56,
+ 0x1A58, 0x1A5E,
+ 0x1A60, 0x1A60,
+ 0x1A62, 0x1A62,
+ 0x1A65, 0x1A6C,
+ 0x1A73, 0x1A7C,
+ 0x1A7F, 0x1A7F,
+ 0x1AB0, 0x1ABD,
+ 0x1ABE, 0x1ABE,
+ 0x1ABF, 0x1ACE,
+ 0x1B00, 0x1B03,
+ 0x1B34, 0x1B34,
+ 0x1B35, 0x1B35,
+ 0x1B36, 0x1B3A,
+ 0x1B3C, 0x1B3C,
+ 0x1B42, 0x1B42,
+ 0x1B6B, 0x1B73,
+ 0x1B80, 0x1B81,
+ 0x1BA2, 0x1BA5,
+ 0x1BA8, 0x1BA9,
+ 0x1BAB, 0x1BAD,
+ 0x1BE6, 0x1BE6,
+ 0x1BE8, 0x1BE9,
+ 0x1BED, 0x1BED,
+ 0x1BEF, 0x1BF1,
+ 0x1C2C, 0x1C33,
+ 0x1C36, 0x1C37,
+ 0x1CD0, 0x1CD2,
+ 0x1CD4, 0x1CE0,
+ 0x1CE2, 0x1CE8,
+ 0x1CED, 0x1CED,
+ 0x1CF4, 0x1CF4,
+ 0x1CF8, 0x1CF9,
+ 0x1DC0, 0x1DFF,
+ 0x200C, 0x200C,
+ 0x20D0, 0x20DC,
+ 0x20DD, 0x20E0,
+ 0x20E1, 0x20E1,
+ 0x20E2, 0x20E4,
+ 0x20E5, 0x20F0,
+ 0x2CEF, 0x2CF1,
+ 0x2D7F, 0x2D7F,
+ 0x2DE0, 0x2DFF,
+ 0x302A, 0x302D,
+ 0x302E, 0x302F,
+ 0x3099, 0x309A,
+ 0xA66F, 0xA66F,
+ 0xA670, 0xA672,
+ 0xA674, 0xA67D,
+ 0xA69E, 0xA69F,
+ 0xA6F0, 0xA6F1,
+ 0xA802, 0xA802,
+ 0xA806, 0xA806,
+ 0xA80B, 0xA80B,
+ 0xA825, 0xA826,
+ 0xA82C, 0xA82C,
+ 0xA8C4, 0xA8C5,
+ 0xA8E0, 0xA8F1,
+ 0xA8FF, 0xA8FF,
+ 0xA926, 0xA92D,
+ 0xA947, 0xA951,
+ 0xA980, 0xA982,
+ 0xA9B3, 0xA9B3,
+ 0xA9B6, 0xA9B9,
+ 0xA9BC, 0xA9BD,
+ 0xA9E5, 0xA9E5,
+ 0xAA29, 0xAA2E,
+ 0xAA31, 0xAA32,
+ 0xAA35, 0xAA36,
+ 0xAA43, 0xAA43,
+ 0xAA4C, 0xAA4C,
+ 0xAA7C, 0xAA7C,
+ 0xAAB0, 0xAAB0,
+ 0xAAB2, 0xAAB4,
+ 0xAAB7, 0xAAB8,
+ 0xAABE, 0xAABF,
+ 0xAAC1, 0xAAC1,
+ 0xAAEC, 0xAAED,
+ 0xAAF6, 0xAAF6,
+ 0xABE5, 0xABE5,
+ 0xABE8, 0xABE8,
+ 0xABED, 0xABED,
+ 0xFB1E, 0xFB1E,
+ 0xFE00, 0xFE0F,
+ 0xFE20, 0xFE2F,
+ 0xFF9E, 0xFF9F,
+ 0x101FD, 0x101FD,
+ 0x102E0, 0x102E0,
+ 0x10376, 0x1037A,
+ 0x10A01, 0x10A03,
+ 0x10A05, 0x10A06,
+ 0x10A0C, 0x10A0F,
+ 0x10A38, 0x10A3A,
+ 0x10A3F, 0x10A3F,
+ 0x10AE5, 0x10AE6,
+ 0x10D24, 0x10D27,
+ 0x10EAB, 0x10EAC,
+ 0x10EFD, 0x10EFF,
+ 0x10F46, 0x10F50,
+ 0x10F82, 0x10F85,
+ 0x11001, 0x11001,
+ 0x11038, 0x11046,
+ 0x11070, 0x11070,
+ 0x11073, 0x11074,
+ 0x1107F, 0x11081,
+ 0x110B3, 0x110B6,
+ 0x110B9, 0x110BA,
+ 0x110C2, 0x110C2,
+ 0x11100, 0x11102,
+ 0x11127, 0x1112B,
+ 0x1112D, 0x11134,
+ 0x11173, 0x11173,
+ 0x11180, 0x11181,
+ 0x111B6, 0x111BE,
+ 0x111C9, 0x111CC,
+ 0x111CF, 0x111CF,
+ 0x1122F, 0x11231,
+ 0x11234, 0x11234,
+ 0x11236, 0x11237,
+ 0x1123E, 0x1123E,
+ 0x11241, 0x11241,
+ 0x112DF, 0x112DF,
+ 0x112E3, 0x112EA,
+ 0x11300, 0x11301,
+ 0x1133B, 0x1133C,
+ 0x1133E, 0x1133E,
+ 0x11340, 0x11340,
+ 0x11357, 0x11357,
+ 0x11366, 0x1136C,
+ 0x11370, 0x11374,
+ 0x11438, 0x1143F,
+ 0x11442, 0x11444,
+ 0x11446, 0x11446,
+ 0x1145E, 0x1145E,
+ 0x114B0, 0x114B0,
+ 0x114B3, 0x114B8,
+ 0x114BA, 0x114BA,
+ 0x114BD, 0x114BD,
+ 0x114BF, 0x114C0,
+ 0x114C2, 0x114C3,
+ 0x115AF, 0x115AF,
+ 0x115B2, 0x115B5,
+ 0x115BC, 0x115BD,
+ 0x115BF, 0x115C0,
+ 0x115DC, 0x115DD,
+ 0x11633, 0x1163A,
+ 0x1163D, 0x1163D,
+ 0x1163F, 0x11640,
+ 0x116AB, 0x116AB,
+ 0x116AD, 0x116AD,
+ 0x116B0, 0x116B5,
+ 0x116B7, 0x116B7,
+ 0x1171D, 0x1171F,
+ 0x11722, 0x11725,
+ 0x11727, 0x1172B,
+ 0x1182F, 0x11837,
+ 0x11839, 0x1183A,
+ 0x11930, 0x11930,
+ 0x1193B, 0x1193C,
+ 0x1193E, 0x1193E,
+ 0x11943, 0x11943,
+ 0x119D4, 0x119D7,
+ 0x119DA, 0x119DB,
+ 0x119E0, 0x119E0,
+ 0x11A01, 0x11A0A,
+ 0x11A33, 0x11A38,
+ 0x11A3B, 0x11A3E,
+ 0x11A47, 0x11A47,
+ 0x11A51, 0x11A56,
+ 0x11A59, 0x11A5B,
+ 0x11A8A, 0x11A96,
+ 0x11A98, 0x11A99,
+ 0x11C30, 0x11C36,
+ 0x11C38, 0x11C3D,
+ 0x11C3F, 0x11C3F,
+ 0x11C92, 0x11CA7,
+ 0x11CAA, 0x11CB0,
+ 0x11CB2, 0x11CB3,
+ 0x11CB5, 0x11CB6,
+ 0x11D31, 0x11D36,
+ 0x11D3A, 0x11D3A,
+ 0x11D3C, 0x11D3D,
+ 0x11D3F, 0x11D45,
+ 0x11D47, 0x11D47,
+ 0x11D90, 0x11D91,
+ 0x11D95, 0x11D95,
+ 0x11D97, 0x11D97,
+ 0x11EF3, 0x11EF4,
+ 0x11F00, 0x11F01,
+ 0x11F36, 0x11F3A,
+ 0x11F40, 0x11F40,
+ 0x11F42, 0x11F42,
+ 0x13440, 0x13440,
+ 0x13447, 0x13455,
+ 0x16AF0, 0x16AF4,
+ 0x16B30, 0x16B36,
+ 0x16F4F, 0x16F4F,
+ 0x16F8F, 0x16F92,
+ 0x16FE4, 0x16FE4,
+ 0x1BC9D, 0x1BC9E,
+ 0x1CF00, 0x1CF2D,
+ 0x1CF30, 0x1CF46,
+ 0x1D165, 0x1D165,
+ 0x1D167, 0x1D169,
+ 0x1D16E, 0x1D172,
+ 0x1D17B, 0x1D182,
+ 0x1D185, 0x1D18B,
+ 0x1D1AA, 0x1D1AD,
+ 0x1D242, 0x1D244,
+ 0x1DA00, 0x1DA36,
+ 0x1DA3B, 0x1DA6C,
+ 0x1DA75, 0x1DA75,
+ 0x1DA84, 0x1DA84,
+ 0x1DA9B, 0x1DA9F,
+ 0x1DAA1, 0x1DAAF,
+ 0x1E000, 0x1E006,
+ 0x1E008, 0x1E018,
+ 0x1E01B, 0x1E021,
+ 0x1E023, 0x1E024,
+ 0x1E026, 0x1E02A,
+ 0x1E08F, 0x1E08F,
+ 0x1E130, 0x1E136,
+ 0x1E2AE, 0x1E2AE,
+ 0x1E2EC, 0x1E2EF,
+ 0x1E4EC, 0x1E4EF,
+ 0x1E8D0, 0x1E8D6,
+ 0x1E944, 0x1E94A,
+ 0xE0020, 0xE007F,
+ 0xE0100, 0xE01EF,
+};
+
+static const int32_t ucg_hangul_syllable_lv_singlets[] = {
+ 0xAC00,
+ 0xAC1C,
+ 0xAC38,
+ 0xAC54,
+ 0xAC70,
+ 0xAC8C,
+ 0xACA8,
+ 0xACC4,
+ 0xACE0,
+ 0xACFC,
+ 0xAD18,
+ 0xAD34,
+ 0xAD50,
+ 0xAD6C,
+ 0xAD88,
+ 0xADA4,
+ 0xADC0,
+ 0xADDC,
+ 0xADF8,
+ 0xAE14,
+ 0xAE30,
+ 0xAE4C,
+ 0xAE68,
+ 0xAE84,
+ 0xAEA0,
+ 0xAEBC,
+ 0xAED8,
+ 0xAEF4,
+ 0xAF10,
+ 0xAF2C,
+ 0xAF48,
+ 0xAF64,
+ 0xAF80,
+ 0xAF9C,
+ 0xAFB8,
+ 0xAFD4,
+ 0xAFF0,
+ 0xB00C,
+ 0xB028,
+ 0xB044,
+ 0xB060,
+ 0xB07C,
+ 0xB098,
+ 0xB0B4,
+ 0xB0D0,
+ 0xB0EC,
+ 0xB108,
+ 0xB124,
+ 0xB140,
+ 0xB15C,
+ 0xB178,
+ 0xB194,
+ 0xB1B0,
+ 0xB1CC,
+ 0xB1E8,
+ 0xB204,
+ 0xB220,
+ 0xB23C,
+ 0xB258,
+ 0xB274,
+ 0xB290,
+ 0xB2AC,
+ 0xB2C8,
+ 0xB2E4,
+ 0xB300,
+ 0xB31C,
+ 0xB338,
+ 0xB354,
+ 0xB370,
+ 0xB38C,
+ 0xB3A8,
+ 0xB3C4,
+ 0xB3E0,
+ 0xB3FC,
+ 0xB418,
+ 0xB434,
+ 0xB450,
+ 0xB46C,
+ 0xB488,
+ 0xB4A4,
+ 0xB4C0,
+ 0xB4DC,
+ 0xB4F8,
+ 0xB514,
+ 0xB530,
+ 0xB54C,
+ 0xB568,
+ 0xB584,
+ 0xB5A0,
+ 0xB5BC,
+ 0xB5D8,
+ 0xB5F4,
+ 0xB610,
+ 0xB62C,
+ 0xB648,
+ 0xB664,
+ 0xB680,
+ 0xB69C,
+ 0xB6B8,
+ 0xB6D4,
+ 0xB6F0,
+ 0xB70C,
+ 0xB728,
+ 0xB744,
+ 0xB760,
+ 0xB77C,
+ 0xB798,
+ 0xB7B4,
+ 0xB7D0,
+ 0xB7EC,
+ 0xB808,
+ 0xB824,
+ 0xB840,
+ 0xB85C,
+ 0xB878,
+ 0xB894,
+ 0xB8B0,
+ 0xB8CC,
+ 0xB8E8,
+ 0xB904,
+ 0xB920,
+ 0xB93C,
+ 0xB958,
+ 0xB974,
+ 0xB990,
+ 0xB9AC,
+ 0xB9C8,
+ 0xB9E4,
+ 0xBA00,
+ 0xBA1C,
+ 0xBA38,
+ 0xBA54,
+ 0xBA70,
+ 0xBA8C,
+ 0xBAA8,
+ 0xBAC4,
+ 0xBAE0,
+ 0xBAFC,
+ 0xBB18,
+ 0xBB34,
+ 0xBB50,
+ 0xBB6C,
+ 0xBB88,
+ 0xBBA4,
+ 0xBBC0,
+ 0xBBDC,
+ 0xBBF8,
+ 0xBC14,
+ 0xBC30,
+ 0xBC4C,
+ 0xBC68,
+ 0xBC84,
+ 0xBCA0,
+ 0xBCBC,
+ 0xBCD8,
+ 0xBCF4,
+ 0xBD10,
+ 0xBD2C,
+ 0xBD48,
+ 0xBD64,
+ 0xBD80,
+ 0xBD9C,
+ 0xBDB8,
+ 0xBDD4,
+ 0xBDF0,
+ 0xBE0C,
+ 0xBE28,
+ 0xBE44,
+ 0xBE60,
+ 0xBE7C,
+ 0xBE98,
+ 0xBEB4,
+ 0xBED0,
+ 0xBEEC,
+ 0xBF08,
+ 0xBF24,
+ 0xBF40,
+ 0xBF5C,
+ 0xBF78,
+ 0xBF94,
+ 0xBFB0,
+ 0xBFCC,
+ 0xBFE8,
+ 0xC004,
+ 0xC020,
+ 0xC03C,
+ 0xC058,
+ 0xC074,
+ 0xC090,
+ 0xC0AC,
+ 0xC0C8,
+ 0xC0E4,
+ 0xC100,
+ 0xC11C,
+ 0xC138,
+ 0xC154,
+ 0xC170,
+ 0xC18C,
+ 0xC1A8,
+ 0xC1C4,
+ 0xC1E0,
+ 0xC1FC,
+ 0xC218,
+ 0xC234,
+ 0xC250,
+ 0xC26C,
+ 0xC288,
+ 0xC2A4,
+ 0xC2C0,
+ 0xC2DC,
+ 0xC2F8,
+ 0xC314,
+ 0xC330,
+ 0xC34C,
+ 0xC368,
+ 0xC384,
+ 0xC3A0,
+ 0xC3BC,
+ 0xC3D8,
+ 0xC3F4,
+ 0xC410,
+ 0xC42C,
+ 0xC448,
+ 0xC464,
+ 0xC480,
+ 0xC49C,
+ 0xC4B8,
+ 0xC4D4,
+ 0xC4F0,
+ 0xC50C,
+ 0xC528,
+ 0xC544,
+ 0xC560,
+ 0xC57C,
+ 0xC598,
+ 0xC5B4,
+ 0xC5D0,
+ 0xC5EC,
+ 0xC608,
+ 0xC624,
+ 0xC640,
+ 0xC65C,
+ 0xC678,
+ 0xC694,
+ 0xC6B0,
+ 0xC6CC,
+ 0xC6E8,
+ 0xC704,
+ 0xC720,
+ 0xC73C,
+ 0xC758,
+ 0xC774,
+ 0xC790,
+ 0xC7AC,
+ 0xC7C8,
+ 0xC7E4,
+ 0xC800,
+ 0xC81C,
+ 0xC838,
+ 0xC854,
+ 0xC870,
+ 0xC88C,
+ 0xC8A8,
+ 0xC8C4,
+ 0xC8E0,
+ 0xC8FC,
+ 0xC918,
+ 0xC934,
+ 0xC950,
+ 0xC96C,
+ 0xC988,
+ 0xC9A4,
+ 0xC9C0,
+ 0xC9DC,
+ 0xC9F8,
+ 0xCA14,
+ 0xCA30,
+ 0xCA4C,
+ 0xCA68,
+ 0xCA84,
+ 0xCAA0,
+ 0xCABC,
+ 0xCAD8,
+ 0xCAF4,
+ 0xCB10,
+ 0xCB2C,
+ 0xCB48,
+ 0xCB64,
+ 0xCB80,
+ 0xCB9C,
+ 0xCBB8,
+ 0xCBD4,
+ 0xCBF0,
+ 0xCC0C,
+ 0xCC28,
+ 0xCC44,
+ 0xCC60,
+ 0xCC7C,
+ 0xCC98,
+ 0xCCB4,
+ 0xCCD0,
+ 0xCCEC,
+ 0xCD08,
+ 0xCD24,
+ 0xCD40,
+ 0xCD5C,
+ 0xCD78,
+ 0xCD94,
+ 0xCDB0,
+ 0xCDCC,
+ 0xCDE8,
+ 0xCE04,
+ 0xCE20,
+ 0xCE3C,
+ 0xCE58,
+ 0xCE74,
+ 0xCE90,
+ 0xCEAC,
+ 0xCEC8,
+ 0xCEE4,
+ 0xCF00,
+ 0xCF1C,
+ 0xCF38,
+ 0xCF54,
+ 0xCF70,
+ 0xCF8C,
+ 0xCFA8,
+ 0xCFC4,
+ 0xCFE0,
+ 0xCFFC,
+ 0xD018,
+ 0xD034,
+ 0xD050,
+ 0xD06C,
+ 0xD088,
+ 0xD0A4,
+ 0xD0C0,
+ 0xD0DC,
+ 0xD0F8,
+ 0xD114,
+ 0xD130,
+ 0xD14C,
+ 0xD168,
+ 0xD184,
+ 0xD1A0,
+ 0xD1BC,
+ 0xD1D8,
+ 0xD1F4,
+ 0xD210,
+ 0xD22C,
+ 0xD248,
+ 0xD264,
+ 0xD280,
+ 0xD29C,
+ 0xD2B8,
+ 0xD2D4,
+ 0xD2F0,
+ 0xD30C,
+ 0xD328,
+ 0xD344,
+ 0xD360,
+ 0xD37C,
+ 0xD398,
+ 0xD3B4,
+ 0xD3D0,
+ 0xD3EC,
+ 0xD408,
+ 0xD424,
+ 0xD440,
+ 0xD45C,
+ 0xD478,
+ 0xD494,
+ 0xD4B0,
+ 0xD4CC,
+ 0xD4E8,
+ 0xD504,
+ 0xD520,
+ 0xD53C,
+ 0xD558,
+ 0xD574,
+ 0xD590,
+ 0xD5AC,
+ 0xD5C8,
+ 0xD5E4,
+ 0xD600,
+ 0xD61C,
+ 0xD638,
+ 0xD654,
+ 0xD670,
+ 0xD68C,
+ 0xD6A8,
+ 0xD6C4,
+ 0xD6E0,
+ 0xD6FC,
+ 0xD718,
+ 0xD734,
+ 0xD750,
+ 0xD76C,
+ 0xD788,
+};
+
+static const int32_t ucg_hangul_syllable_lvt_ranges[] = {
+ 0xAC01, 0xAC1B,
+ 0xAC1D, 0xAC37,
+ 0xAC39, 0xAC53,
+ 0xAC55, 0xAC6F,
+ 0xAC71, 0xAC8B,
+ 0xAC8D, 0xACA7,
+ 0xACA9, 0xACC3,
+ 0xACC5, 0xACDF,
+ 0xACE1, 0xACFB,
+ 0xACFD, 0xAD17,
+ 0xAD19, 0xAD33,
+ 0xAD35, 0xAD4F,
+ 0xAD51, 0xAD6B,
+ 0xAD6D, 0xAD87,
+ 0xAD89, 0xADA3,
+ 0xADA5, 0xADBF,
+ 0xADC1, 0xADDB,
+ 0xADDD, 0xADF7,
+ 0xADF9, 0xAE13,
+ 0xAE15, 0xAE2F,
+ 0xAE31, 0xAE4B,
+ 0xAE4D, 0xAE67,
+ 0xAE69, 0xAE83,
+ 0xAE85, 0xAE9F,
+ 0xAEA1, 0xAEBB,
+ 0xAEBD, 0xAED7,
+ 0xAED9, 0xAEF3,
+ 0xAEF5, 0xAF0F,
+ 0xAF11, 0xAF2B,
+ 0xAF2D, 0xAF47,
+ 0xAF49, 0xAF63,
+ 0xAF65, 0xAF7F,
+ 0xAF81, 0xAF9B,
+ 0xAF9D, 0xAFB7,
+ 0xAFB9, 0xAFD3,
+ 0xAFD5, 0xAFEF,
+ 0xAFF1, 0xB00B,
+ 0xB00D, 0xB027,
+ 0xB029, 0xB043,
+ 0xB045, 0xB05F,
+ 0xB061, 0xB07B,
+ 0xB07D, 0xB097,
+ 0xB099, 0xB0B3,
+ 0xB0B5, 0xB0CF,
+ 0xB0D1, 0xB0EB,
+ 0xB0ED, 0xB107,
+ 0xB109, 0xB123,
+ 0xB125, 0xB13F,
+ 0xB141, 0xB15B,
+ 0xB15D, 0xB177,
+ 0xB179, 0xB193,
+ 0xB195, 0xB1AF,
+ 0xB1B1, 0xB1CB,
+ 0xB1CD, 0xB1E7,
+ 0xB1E9, 0xB203,
+ 0xB205, 0xB21F,
+ 0xB221, 0xB23B,
+ 0xB23D, 0xB257,
+ 0xB259, 0xB273,
+ 0xB275, 0xB28F,
+ 0xB291, 0xB2AB,
+ 0xB2AD, 0xB2C7,
+ 0xB2C9, 0xB2E3,
+ 0xB2E5, 0xB2FF,
+ 0xB301, 0xB31B,
+ 0xB31D, 0xB337,
+ 0xB339, 0xB353,
+ 0xB355, 0xB36F,
+ 0xB371, 0xB38B,
+ 0xB38D, 0xB3A7,
+ 0xB3A9, 0xB3C3,
+ 0xB3C5, 0xB3DF,
+ 0xB3E1, 0xB3FB,
+ 0xB3FD, 0xB417,
+ 0xB419, 0xB433,
+ 0xB435, 0xB44F,
+ 0xB451, 0xB46B,
+ 0xB46D, 0xB487,
+ 0xB489, 0xB4A3,
+ 0xB4A5, 0xB4BF,
+ 0xB4C1, 0xB4DB,
+ 0xB4DD, 0xB4F7,
+ 0xB4F9, 0xB513,
+ 0xB515, 0xB52F,
+ 0xB531, 0xB54B,
+ 0xB54D, 0xB567,
+ 0xB569, 0xB583,
+ 0xB585, 0xB59F,
+ 0xB5A1, 0xB5BB,
+ 0xB5BD, 0xB5D7,
+ 0xB5D9, 0xB5F3,
+ 0xB5F5, 0xB60F,
+ 0xB611, 0xB62B,
+ 0xB62D, 0xB647,
+ 0xB649, 0xB663,
+ 0xB665, 0xB67F,
+ 0xB681, 0xB69B,
+ 0xB69D, 0xB6B7,
+ 0xB6B9, 0xB6D3,
+ 0xB6D5, 0xB6EF,
+ 0xB6F1, 0xB70B,
+ 0xB70D, 0xB727,
+ 0xB729, 0xB743,
+ 0xB745, 0xB75F,
+ 0xB761, 0xB77B,
+ 0xB77D, 0xB797,
+ 0xB799, 0xB7B3,
+ 0xB7B5, 0xB7CF,
+ 0xB7D1, 0xB7EB,
+ 0xB7ED, 0xB807,
+ 0xB809, 0xB823,
+ 0xB825, 0xB83F,
+ 0xB841, 0xB85B,
+ 0xB85D, 0xB877,
+ 0xB879, 0xB893,
+ 0xB895, 0xB8AF,
+ 0xB8B1, 0xB8CB,
+ 0xB8CD, 0xB8E7,
+ 0xB8E9, 0xB903,
+ 0xB905, 0xB91F,
+ 0xB921, 0xB93B,
+ 0xB93D, 0xB957,
+ 0xB959, 0xB973,
+ 0xB975, 0xB98F,
+ 0xB991, 0xB9AB,
+ 0xB9AD, 0xB9C7,
+ 0xB9C9, 0xB9E3,
+ 0xB9E5, 0xB9FF,
+ 0xBA01, 0xBA1B,
+ 0xBA1D, 0xBA37,
+ 0xBA39, 0xBA53,
+ 0xBA55, 0xBA6F,
+ 0xBA71, 0xBA8B,
+ 0xBA8D, 0xBAA7,
+ 0xBAA9, 0xBAC3,
+ 0xBAC5, 0xBADF,
+ 0xBAE1, 0xBAFB,
+ 0xBAFD, 0xBB17,
+ 0xBB19, 0xBB33,
+ 0xBB35, 0xBB4F,
+ 0xBB51, 0xBB6B,
+ 0xBB6D, 0xBB87,
+ 0xBB89, 0xBBA3,
+ 0xBBA5, 0xBBBF,
+ 0xBBC1, 0xBBDB,
+ 0xBBDD, 0xBBF7,
+ 0xBBF9, 0xBC13,
+ 0xBC15, 0xBC2F,
+ 0xBC31, 0xBC4B,
+ 0xBC4D, 0xBC67,
+ 0xBC69, 0xBC83,
+ 0xBC85, 0xBC9F,
+ 0xBCA1, 0xBCBB,
+ 0xBCBD, 0xBCD7,
+ 0xBCD9, 0xBCF3,
+ 0xBCF5, 0xBD0F,
+ 0xBD11, 0xBD2B,
+ 0xBD2D, 0xBD47,
+ 0xBD49, 0xBD63,
+ 0xBD65, 0xBD7F,
+ 0xBD81, 0xBD9B,
+ 0xBD9D, 0xBDB7,
+ 0xBDB9, 0xBDD3,
+ 0xBDD5, 0xBDEF,
+ 0xBDF1, 0xBE0B,
+ 0xBE0D, 0xBE27,
+ 0xBE29, 0xBE43,
+ 0xBE45, 0xBE5F,
+ 0xBE61, 0xBE7B,
+ 0xBE7D, 0xBE97,
+ 0xBE99, 0xBEB3,
+ 0xBEB5, 0xBECF,
+ 0xBED1, 0xBEEB,
+ 0xBEED, 0xBF07,
+ 0xBF09, 0xBF23,
+ 0xBF25, 0xBF3F,
+ 0xBF41, 0xBF5B,
+ 0xBF5D, 0xBF77,
+ 0xBF79, 0xBF93,
+ 0xBF95, 0xBFAF,
+ 0xBFB1, 0xBFCB,
+ 0xBFCD, 0xBFE7,
+ 0xBFE9, 0xC003,
+ 0xC005, 0xC01F,
+ 0xC021, 0xC03B,
+ 0xC03D, 0xC057,
+ 0xC059, 0xC073,
+ 0xC075, 0xC08F,
+ 0xC091, 0xC0AB,
+ 0xC0AD, 0xC0C7,
+ 0xC0C9, 0xC0E3,
+ 0xC0E5, 0xC0FF,
+ 0xC101, 0xC11B,
+ 0xC11D, 0xC137,
+ 0xC139, 0xC153,
+ 0xC155, 0xC16F,
+ 0xC171, 0xC18B,
+ 0xC18D, 0xC1A7,
+ 0xC1A9, 0xC1C3,
+ 0xC1C5, 0xC1DF,
+ 0xC1E1, 0xC1FB,
+ 0xC1FD, 0xC217,
+ 0xC219, 0xC233,
+ 0xC235, 0xC24F,
+ 0xC251, 0xC26B,
+ 0xC26D, 0xC287,
+ 0xC289, 0xC2A3,
+ 0xC2A5, 0xC2BF,
+ 0xC2C1, 0xC2DB,
+ 0xC2DD, 0xC2F7,
+ 0xC2F9, 0xC313,
+ 0xC315, 0xC32F,
+ 0xC331, 0xC34B,
+ 0xC34D, 0xC367,
+ 0xC369, 0xC383,
+ 0xC385, 0xC39F,
+ 0xC3A1, 0xC3BB,
+ 0xC3BD, 0xC3D7,
+ 0xC3D9, 0xC3F3,
+ 0xC3F5, 0xC40F,
+ 0xC411, 0xC42B,
+ 0xC42D, 0xC447,
+ 0xC449, 0xC463,
+ 0xC465, 0xC47F,
+ 0xC481, 0xC49B,
+ 0xC49D, 0xC4B7,
+ 0xC4B9, 0xC4D3,
+ 0xC4D5, 0xC4EF,
+ 0xC4F1, 0xC50B,
+ 0xC50D, 0xC527,
+ 0xC529, 0xC543,
+ 0xC545, 0xC55F,
+ 0xC561, 0xC57B,
+ 0xC57D, 0xC597,
+ 0xC599, 0xC5B3,
+ 0xC5B5, 0xC5CF,
+ 0xC5D1, 0xC5EB,
+ 0xC5ED, 0xC607,
+ 0xC609, 0xC623,
+ 0xC625, 0xC63F,
+ 0xC641, 0xC65B,
+ 0xC65D, 0xC677,
+ 0xC679, 0xC693,
+ 0xC695, 0xC6AF,
+ 0xC6B1, 0xC6CB,
+ 0xC6CD, 0xC6E7,
+ 0xC6E9, 0xC703,
+ 0xC705, 0xC71F,
+ 0xC721, 0xC73B,
+ 0xC73D, 0xC757,
+ 0xC759, 0xC773,
+ 0xC775, 0xC78F,
+ 0xC791, 0xC7AB,
+ 0xC7AD, 0xC7C7,
+ 0xC7C9, 0xC7E3,
+ 0xC7E5, 0xC7FF,
+ 0xC801, 0xC81B,
+ 0xC81D, 0xC837,
+ 0xC839, 0xC853,
+ 0xC855, 0xC86F,
+ 0xC871, 0xC88B,
+ 0xC88D, 0xC8A7,
+ 0xC8A9, 0xC8C3,
+ 0xC8C5, 0xC8DF,
+ 0xC8E1, 0xC8FB,
+ 0xC8FD, 0xC917,
+ 0xC919, 0xC933,
+ 0xC935, 0xC94F,
+ 0xC951, 0xC96B,
+ 0xC96D, 0xC987,
+ 0xC989, 0xC9A3,
+ 0xC9A5, 0xC9BF,
+ 0xC9C1, 0xC9DB,
+ 0xC9DD, 0xC9F7,
+ 0xC9F9, 0xCA13,
+ 0xCA15, 0xCA2F,
+ 0xCA31, 0xCA4B,
+ 0xCA4D, 0xCA67,
+ 0xCA69, 0xCA83,
+ 0xCA85, 0xCA9F,
+ 0xCAA1, 0xCABB,
+ 0xCABD, 0xCAD7,
+ 0xCAD9, 0xCAF3,
+ 0xCAF5, 0xCB0F,
+ 0xCB11, 0xCB2B,
+ 0xCB2D, 0xCB47,
+ 0xCB49, 0xCB63,
+ 0xCB65, 0xCB7F,
+ 0xCB81, 0xCB9B,
+ 0xCB9D, 0xCBB7,
+ 0xCBB9, 0xCBD3,
+ 0xCBD5, 0xCBEF,
+ 0xCBF1, 0xCC0B,
+ 0xCC0D, 0xCC27,
+ 0xCC29, 0xCC43,
+ 0xCC45, 0xCC5F,
+ 0xCC61, 0xCC7B,
+ 0xCC7D, 0xCC97,
+ 0xCC99, 0xCCB3,
+ 0xCCB5, 0xCCCF,
+ 0xCCD1, 0xCCEB,
+ 0xCCED, 0xCD07,
+ 0xCD09, 0xCD23,
+ 0xCD25, 0xCD3F,
+ 0xCD41, 0xCD5B,
+ 0xCD5D, 0xCD77,
+ 0xCD79, 0xCD93,
+ 0xCD95, 0xCDAF,
+ 0xCDB1, 0xCDCB,
+ 0xCDCD, 0xCDE7,
+ 0xCDE9, 0xCE03,
+ 0xCE05, 0xCE1F,
+ 0xCE21, 0xCE3B,
+ 0xCE3D, 0xCE57,
+ 0xCE59, 0xCE73,
+ 0xCE75, 0xCE8F,
+ 0xCE91, 0xCEAB,
+ 0xCEAD, 0xCEC7,
+ 0xCEC9, 0xCEE3,
+ 0xCEE5, 0xCEFF,
+ 0xCF01, 0xCF1B,
+ 0xCF1D, 0xCF37,
+ 0xCF39, 0xCF53,
+ 0xCF55, 0xCF6F,
+ 0xCF71, 0xCF8B,
+ 0xCF8D, 0xCFA7,
+ 0xCFA9, 0xCFC3,
+ 0xCFC5, 0xCFDF,
+ 0xCFE1, 0xCFFB,
+ 0xCFFD, 0xD017,
+ 0xD019, 0xD033,
+ 0xD035, 0xD04F,
+ 0xD051, 0xD06B,
+ 0xD06D, 0xD087,
+ 0xD089, 0xD0A3,
+ 0xD0A5, 0xD0BF,
+ 0xD0C1, 0xD0DB,
+ 0xD0DD, 0xD0F7,
+ 0xD0F9, 0xD113,
+ 0xD115, 0xD12F,
+ 0xD131, 0xD14B,
+ 0xD14D, 0xD167,
+ 0xD169, 0xD183,
+ 0xD185, 0xD19F,
+ 0xD1A1, 0xD1BB,
+ 0xD1BD, 0xD1D7,
+ 0xD1D9, 0xD1F3,
+ 0xD1F5, 0xD20F,
+ 0xD211, 0xD22B,
+ 0xD22D, 0xD247,
+ 0xD249, 0xD263,
+ 0xD265, 0xD27F,
+ 0xD281, 0xD29B,
+ 0xD29D, 0xD2B7,
+ 0xD2B9, 0xD2D3,
+ 0xD2D5, 0xD2EF,
+ 0xD2F1, 0xD30B,
+ 0xD30D, 0xD327,
+ 0xD329, 0xD343,
+ 0xD345, 0xD35F,
+ 0xD361, 0xD37B,
+ 0xD37D, 0xD397,
+ 0xD399, 0xD3B3,
+ 0xD3B5, 0xD3CF,
+ 0xD3D1, 0xD3EB,
+ 0xD3ED, 0xD407,
+ 0xD409, 0xD423,
+ 0xD425, 0xD43F,
+ 0xD441, 0xD45B,
+ 0xD45D, 0xD477,
+ 0xD479, 0xD493,
+ 0xD495, 0xD4AF,
+ 0xD4B1, 0xD4CB,
+ 0xD4CD, 0xD4E7,
+ 0xD4E9, 0xD503,
+ 0xD505, 0xD51F,
+ 0xD521, 0xD53B,
+ 0xD53D, 0xD557,
+ 0xD559, 0xD573,
+ 0xD575, 0xD58F,
+ 0xD591, 0xD5AB,
+ 0xD5AD, 0xD5C7,
+ 0xD5C9, 0xD5E3,
+ 0xD5E5, 0xD5FF,
+ 0xD601, 0xD61B,
+ 0xD61D, 0xD637,
+ 0xD639, 0xD653,
+ 0xD655, 0xD66F,
+ 0xD671, 0xD68B,
+ 0xD68D, 0xD6A7,
+ 0xD6A9, 0xD6C3,
+ 0xD6C5, 0xD6DF,
+ 0xD6E1, 0xD6FB,
+ 0xD6FD, 0xD717,
+ 0xD719, 0xD733,
+ 0xD735, 0xD74F,
+ 0xD751, 0xD76B,
+ 0xD76D, 0xD787,
+ 0xD789, 0xD7A3,
+};
+
+static const int32_t ucg_indic_conjunct_break_consonant_ranges[] = {
+ 0x0915, 0x0939,
+ 0x0958, 0x095F,
+ 0x0978, 0x097F,
+ 0x0995, 0x09A8,
+ 0x09AA, 0x09B0,
+ 0x09B2, 0x09B2,
+ 0x09B6, 0x09B9,
+ 0x09DC, 0x09DD,
+ 0x09DF, 0x09DF,
+ 0x09F0, 0x09F1,
+ 0x0A95, 0x0AA8,
+ 0x0AAA, 0x0AB0,
+ 0x0AB2, 0x0AB3,
+ 0x0AB5, 0x0AB9,
+ 0x0AF9, 0x0AF9,
+ 0x0B15, 0x0B28,
+ 0x0B2A, 0x0B30,
+ 0x0B32, 0x0B33,
+ 0x0B35, 0x0B39,
+ 0x0B5C, 0x0B5D,
+ 0x0B5F, 0x0B5F,
+ 0x0B71, 0x0B71,
+ 0x0C15, 0x0C28,
+ 0x0C2A, 0x0C39,
+ 0x0C58, 0x0C5A,
+ 0x0D15, 0x0D3A,
+};
+
+static const int32_t ucg_indic_conjunct_break_extend_ranges[] = {
+ 0x0300, 0x034E,
+ 0x0350, 0x036F,
+ 0x0483, 0x0487,
+ 0x0591, 0x05BD,
+ 0x05BF, 0x05BF,
+ 0x05C1, 0x05C2,
+ 0x05C4, 0x05C5,
+ 0x05C7, 0x05C7,
+ 0x0610, 0x061A,
+ 0x064B, 0x065F,
+ 0x0670, 0x0670,
+ 0x06D6, 0x06DC,
+ 0x06DF, 0x06E4,
+ 0x06E7, 0x06E8,
+ 0x06EA, 0x06ED,
+ 0x0711, 0x0711,
+ 0x0730, 0x074A,
+ 0x07EB, 0x07F3,
+ 0x07FD, 0x07FD,
+ 0x0816, 0x0819,
+ 0x081B, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082D,
+ 0x0859, 0x085B,
+ 0x0898, 0x089F,
+ 0x08CA, 0x08E1,
+ 0x08E3, 0x08FF,
+ 0x093C, 0x093C,
+ 0x0951, 0x0954,
+ 0x09BC, 0x09BC,
+ 0x09FE, 0x09FE,
+ 0x0A3C, 0x0A3C,
+ 0x0ABC, 0x0ABC,
+ 0x0B3C, 0x0B3C,
+ 0x0C3C, 0x0C3C,
+ 0x0C55, 0x0C56,
+ 0x0CBC, 0x0CBC,
+ 0x0D3B, 0x0D3C,
+ 0x0E38, 0x0E3A,
+ 0x0E48, 0x0E4B,
+ 0x0EB8, 0x0EBA,
+ 0x0EC8, 0x0ECB,
+ 0x0F18, 0x0F19,
+ 0x0F35, 0x0F35,
+ 0x0F37, 0x0F37,
+ 0x0F39, 0x0F39,
+ 0x0F71, 0x0F72,
+ 0x0F74, 0x0F74,
+ 0x0F7A, 0x0F7D,
+ 0x0F80, 0x0F80,
+ 0x0F82, 0x0F84,
+ 0x0F86, 0x0F87,
+ 0x0FC6, 0x0FC6,
+ 0x1037, 0x1037,
+ 0x1039, 0x103A,
+ 0x108D, 0x108D,
+ 0x135D, 0x135F,
+ 0x1714, 0x1714,
+ 0x17D2, 0x17D2,
+ 0x17DD, 0x17DD,
+ 0x18A9, 0x18A9,
+ 0x1939, 0x193B,
+ 0x1A17, 0x1A18,
+ 0x1A60, 0x1A60,
+ 0x1A75, 0x1A7C,
+ 0x1A7F, 0x1A7F,
+ 0x1AB0, 0x1ABD,
+ 0x1ABF, 0x1ACE,
+ 0x1B34, 0x1B34,
+ 0x1B6B, 0x1B73,
+ 0x1BAB, 0x1BAB,
+ 0x1BE6, 0x1BE6,
+ 0x1C37, 0x1C37,
+ 0x1CD0, 0x1CD2,
+ 0x1CD4, 0x1CE0,
+ 0x1CE2, 0x1CE8,
+ 0x1CED, 0x1CED,
+ 0x1CF4, 0x1CF4,
+ 0x1CF8, 0x1CF9,
+ 0x1DC0, 0x1DFF,
+ 0x200D, 0x200D,
+ 0x20D0, 0x20DC,
+ 0x20E1, 0x20E1,
+ 0x20E5, 0x20F0,
+ 0x2CEF, 0x2CF1,
+ 0x2D7F, 0x2D7F,
+ 0x2DE0, 0x2DFF,
+ 0x302A, 0x302D,
+ 0x302E, 0x302F,
+ 0x3099, 0x309A,
+ 0xA66F, 0xA66F,
+ 0xA674, 0xA67D,
+ 0xA69E, 0xA69F,
+ 0xA6F0, 0xA6F1,
+ 0xA82C, 0xA82C,
+ 0xA8E0, 0xA8F1,
+ 0xA92B, 0xA92D,
+ 0xA9B3, 0xA9B3,
+ 0xAAB0, 0xAAB0,
+ 0xAAB2, 0xAAB4,
+ 0xAAB7, 0xAAB8,
+ 0xAABE, 0xAABF,
+ 0xAAC1, 0xAAC1,
+ 0xAAF6, 0xAAF6,
+ 0xABED, 0xABED,
+ 0xFB1E, 0xFB1E,
+ 0xFE20, 0xFE2F,
+ 0x101FD, 0x101FD,
+ 0x102E0, 0x102E0,
+ 0x10376, 0x1037A,
+ 0x10A0D, 0x10A0D,
+ 0x10A0F, 0x10A0F,
+ 0x10A38, 0x10A3A,
+ 0x10A3F, 0x10A3F,
+ 0x10AE5, 0x10AE6,
+ 0x10D24, 0x10D27,
+ 0x10EAB, 0x10EAC,
+ 0x10EFD, 0x10EFF,
+ 0x10F46, 0x10F50,
+ 0x10F82, 0x10F85,
+ 0x11070, 0x11070,
+ 0x1107F, 0x1107F,
+ 0x110BA, 0x110BA,
+ 0x11100, 0x11102,
+ 0x11133, 0x11134,
+ 0x11173, 0x11173,
+ 0x111CA, 0x111CA,
+ 0x11236, 0x11236,
+ 0x112E9, 0x112EA,
+ 0x1133B, 0x1133C,
+ 0x11366, 0x1136C,
+ 0x11370, 0x11374,
+ 0x11446, 0x11446,
+ 0x1145E, 0x1145E,
+ 0x114C3, 0x114C3,
+ 0x115C0, 0x115C0,
+ 0x116B7, 0x116B7,
+ 0x1172B, 0x1172B,
+ 0x1183A, 0x1183A,
+ 0x1193E, 0x1193E,
+ 0x11943, 0x11943,
+ 0x11A34, 0x11A34,
+ 0x11A47, 0x11A47,
+ 0x11A99, 0x11A99,
+ 0x11D42, 0x11D42,
+ 0x11D44, 0x11D45,
+ 0x11D97, 0x11D97,
+ 0x11F42, 0x11F42,
+ 0x16AF0, 0x16AF4,
+ 0x16B30, 0x16B36,
+ 0x1BC9E, 0x1BC9E,
+ 0x1D165, 0x1D165,
+ 0x1D167, 0x1D169,
+ 0x1D16E, 0x1D172,
+ 0x1D17B, 0x1D182,
+ 0x1D185, 0x1D18B,
+ 0x1D1AA, 0x1D1AD,
+ 0x1D242, 0x1D244,
+ 0x1E000, 0x1E006,
+ 0x1E008, 0x1E018,
+ 0x1E01B, 0x1E021,
+ 0x1E023, 0x1E024,
+ 0x1E026, 0x1E02A,
+ 0x1E08F, 0x1E08F,
+ 0x1E130, 0x1E136,
+ 0x1E2AE, 0x1E2AE,
+ 0x1E2EC, 0x1E2EF,
+ 0x1E4EC, 0x1E4EF,
+ 0x1E8D0, 0x1E8D6,
+ 0x1E944, 0x1E94A,
+};
+
+// Fullwidth (F) and Wide (W) are counted as 2.
+// Everything else is 1.
+//
+// Derived from: https://unicode.org/Public/15.1.0/ucd/EastAsianWidth.txt
+static const int32_t ucg_normalized_east_asian_width_ranges[] = {
+ 0x0000, 0x10FF, 1,
+ 0x1100, 0x115F, 2,
+ 0x1160, 0x2319, 1,
+ 0x231A, 0x231B, 2,
+ 0x231C, 0x2328, 1,
+ 0x2329, 0x232A, 2,
+ 0x232B, 0x23E8, 1,
+ 0x23E9, 0x23EC, 2,
+ 0x23ED, 0x23EF, 1,
+ 0x23F0, 0x23F0, 2,
+ 0x23F1, 0x23F2, 1,
+ 0x23F3, 0x23F3, 2,
+ 0x23F4, 0x25FC, 1,
+ 0x25FD, 0x25FE, 2,
+ 0x25FF, 0x2613, 1,
+ 0x2614, 0x2615, 2,
+ 0x2616, 0x2647, 1,
+ 0x2648, 0x2653, 2,
+ 0x2654, 0x267E, 1,
+ 0x267F, 0x267F, 2,
+ 0x2680, 0x2692, 1,
+ 0x2693, 0x2693, 2,
+ 0x2694, 0x26A0, 1,
+ 0x26A1, 0x26A1, 2,
+ 0x26A2, 0x26A9, 1,
+ 0x26AA, 0x26AB, 2,
+ 0x26AC, 0x26BC, 1,
+ 0x26BD, 0x26BE, 2,
+ 0x26BF, 0x26C3, 1,
+ 0x26C4, 0x26C5, 2,
+ 0x26C6, 0x26CD, 1,
+ 0x26CE, 0x26CE, 2,
+ 0x26CF, 0x26D3, 1,
+ 0x26D4, 0x26D4, 2,
+ 0x26D5, 0x26E9, 1,
+ 0x26EA, 0x26EA, 2,
+ 0x26EB, 0x26F1, 1,
+ 0x26F2, 0x26F3, 2,
+ 0x26F4, 0x26F4, 1,
+ 0x26F5, 0x26F5, 2,
+ 0x26F6, 0x26F9, 1,
+ 0x26FA, 0x26FA, 2,
+ 0x26FB, 0x26FC, 1,
+ 0x26FD, 0x26FD, 2,
+ 0x26FE, 0x2704, 1,
+ 0x2705, 0x2705, 2,
+ 0x2706, 0x2709, 1,
+ 0x270A, 0x270B, 2,
+ 0x270C, 0x2727, 1,
+ 0x2728, 0x2728, 2,
+ 0x2729, 0x274B, 1,
+ 0x274C, 0x274C, 2,
+ 0x274D, 0x274D, 1,
+ 0x274E, 0x274E, 2,
+ 0x274F, 0x2752, 1,
+ 0x2753, 0x2755, 2,
+ 0x2756, 0x2756, 1,
+ 0x2757, 0x2757, 2,
+ 0x2758, 0x2794, 1,
+ 0x2795, 0x2797, 2,
+ 0x2798, 0x27AF, 1,
+ 0x27B0, 0x27B0, 2,
+ 0x27B1, 0x27BE, 1,
+ 0x27BF, 0x27BF, 2,
+ 0x27C0, 0x2B1A, 1,
+ 0x2B1B, 0x2B1C, 2,
+ 0x2B1D, 0x2B4F, 1,
+ 0x2B50, 0x2B50, 2,
+ 0x2B51, 0x2B54, 1,
+ 0x2B55, 0x2B55, 2,
+ 0x2B56, 0x2E5D, 1,
+ 0x2E80, 0x303E, 2,
+ 0x303F, 0x303F, 1,
+ 0x3041, 0x3247, 2,
+ 0x3248, 0x324F, 1,
+ 0x3250, 0x4DBF, 2,
+ 0x4DC0, 0x4DFF, 1,
+ 0x4E00, 0xA4C6, 2,
+ 0xA4D0, 0xA95F, 1,
+ 0xA960, 0xA97C, 2,
+ 0xA980, 0xABF9, 1,
+ 0xAC00, 0xD7A3, 2,
+ 0xD7B0, 0xF8FF, 1,
+ 0xF900, 0xFAFF, 2,
+ 0xFB00, 0xFE0F, 1,
+ 0xFE10, 0xFE19, 2,
+ 0xFE20, 0xFE2F, 1,
+ 0xFE30, 0xFE6B, 2,
+ 0xFE70, 0xFEFF, 1,
+ 0xFF01, 0xFF60, 2,
+ 0xFF61, 0xFFDC, 1,
+ 0xFFE0, 0xFFE6, 2,
+ 0xFFE8, 0x16F9F, 1,
+ 0x16FE0, 0x1B2FB, 2,
+ 0x1BC00, 0x1F003, 1,
+ 0x1F004, 0x1F004, 2,
+ 0x1F005, 0x1F0CE, 1,
+ 0x1F0CF, 0x1F0CF, 2,
+ 0x1F0D1, 0x1F18D, 1,
+ 0x1F18E, 0x1F18E, 2,
+ 0x1F18F, 0x1F190, 1,
+ 0x1F191, 0x1F19A, 2,
+ 0x1F19B, 0x1F1FF, 1,
+ 0x1F200, 0x1F320, 2,
+ 0x1F321, 0x1F32C, 1,
+ 0x1F32D, 0x1F335, 2,
+ 0x1F336, 0x1F336, 1,
+ 0x1F337, 0x1F37C, 2,
+ 0x1F37D, 0x1F37D, 1,
+ 0x1F37E, 0x1F393, 2,
+ 0x1F394, 0x1F39F, 1,
+ 0x1F3A0, 0x1F3CA, 2,
+ 0x1F3CB, 0x1F3CE, 1,
+ 0x1F3CF, 0x1F3D3, 2,
+ 0x1F3D4, 0x1F3DF, 1,
+ 0x1F3E0, 0x1F3F0, 2,
+ 0x1F3F1, 0x1F3F3, 1,
+ 0x1F3F4, 0x1F3F4, 2,
+ 0x1F3F5, 0x1F3F7, 1,
+ 0x1F3F8, 0x1F43E, 2,
+ 0x1F43F, 0x1F43F, 1,
+ 0x1F440, 0x1F440, 2,
+ 0x1F441, 0x1F441, 1,
+ 0x1F442, 0x1F4FC, 2,
+ 0x1F4FD, 0x1F4FE, 1,
+ 0x1F4FF, 0x1F53D, 2,
+ 0x1F53E, 0x1F54A, 1,
+ 0x1F54B, 0x1F54E, 2,
+ 0x1F54F, 0x1F54F, 1,
+ 0x1F550, 0x1F567, 2,
+ 0x1F568, 0x1F579, 1,
+ 0x1F57A, 0x1F57A, 2,
+ 0x1F57B, 0x1F594, 1,
+ 0x1F595, 0x1F596, 2,
+ 0x1F597, 0x1F5A3, 1,
+ 0x1F5A4, 0x1F5A4, 2,
+ 0x1F5A5, 0x1F5FA, 1,
+ 0x1F5FB, 0x1F64F, 2,
+ 0x1F650, 0x1F67F, 1,
+ 0x1F680, 0x1F6C5, 2,
+ 0x1F6C6, 0x1F6CB, 1,
+ 0x1F6CC, 0x1F6CC, 2,
+ 0x1F6CD, 0x1F6CF, 1,
+ 0x1F6D0, 0x1F6D2, 2,
+ 0x1F6D3, 0x1F6D4, 1,
+ 0x1F6D5, 0x1F6DF, 2,
+ 0x1F6E0, 0x1F6EA, 1,
+ 0x1F6EB, 0x1F6EC, 2,
+ 0x1F6F0, 0x1F6F3, 1,
+ 0x1F6F4, 0x1F6FC, 2,
+ 0x1F700, 0x1F7D9, 1,
+ 0x1F7E0, 0x1F7F0, 2,
+ 0x1F800, 0x1F90B, 1,
+ 0x1F90C, 0x1F93A, 2,
+ 0x1F93B, 0x1F93B, 1,
+ 0x1F93C, 0x1F945, 2,
+ 0x1F946, 0x1F946, 1,
+ 0x1F947, 0x1F9FF, 2,
+ 0x1FA00, 0x1FA6D, 1,
+ 0x1FA70, 0x1FAF8, 2,
+ 0x1FB00, 0x1FBF9, 1,
+ 0x20000, 0x3FFFD, 2,
+ 0xE0001, 0x10FFFD, 1,
+};
+
+//
+// End of Unicode 15.1.0 block.
+//
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UCG_TABLES_INCLUDED */
diff --git a/src/unicode.cpp b/src/unicode.cpp
index c244a323c..665d5b182 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -162,3 +162,8 @@ end:
if (codepoint_out) *codepoint_out = codepoint;
return width;
}
+
+// NOTE(Feoramund): It's down here because I made UCG use the utf8_decode above to avoid duplicating code.
+extern "C" {
+#include "ucg/ucg.c"
+}