aboutsummaryrefslogtreecommitdiff
path: root/core/unicode
diff options
context:
space:
mode:
authorFeoramund <161657516+Feoramund@users.noreply.github.com>2024-06-17 21:21:06 -0400
committerFeoramund <161657516+Feoramund@users.noreply.github.com>2024-06-17 21:57:32 -0400
commit1620a69398234cb3a58c531bb57416a6ae801136 (patch)
treed97969ade106f167c9285e1bfc6d09e101256cda /core/unicode
parent1a93dfd28f8e256c164d67985866f0aab3659d44 (diff)
Add `decode_grapheme_clusters` to `core:unicode/utf8`
Diffstat (limited to 'core/unicode')
-rw-r--r--core/unicode/letter.odin222
-rw-r--r--core/unicode/tables.odin2449
-rw-r--r--core/unicode/utf8/grapheme.odin387
3 files changed, 3058 insertions, 0 deletions
diff --git a/core/unicode/letter.odin b/core/unicode/letter.odin
index ff167334c..46b84849f 100644
--- a/core/unicode/letter.odin
+++ b/core/unicode/letter.odin
@@ -5,6 +5,9 @@ REPLACEMENT_CHAR :: '\ufffd' // Represented an invalid code point
MAX_ASCII :: '\u007f' // Maximum ASCII value
MAX_LATIN1 :: '\u00ff' // Maximum Latin-1 value
+ZERO_WIDTH_NON_JOINER :: '\u200C'
+ZERO_WIDTH_JOINER :: '\u200D'
+
binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
n := length
t := 0
@@ -193,3 +196,222 @@ is_symbol :: proc(r: rune) -> bool {
}
return false
}
+
+//
+// The procedures below are accurate as of Unicode 15.1.0.
+//
+
+// Emoji_Modifier
+is_emoji_modifier :: proc(r: rune) -> bool {
+ return 0x1F3FB <= r && r <= 0x1F3FF
+}
+
+// Regional_Indicator
+is_regional_indicator :: proc(r: rune) -> bool {
+ return 0x1F1E6 <= r && r <= 0x1F1FF
+}
+
+// General_Category=Enclosing_Mark
+is_enclosing_mark :: proc(r: rune) -> bool {
+ switch r {
+ case 0x0488,
+ 0x0489,
+ 0x1ABE,
+ 0x20DD ..= 0x20E0,
+ 0x20E2 ..= 0x20E4,
+ 0xA670 ..= 0xA672: return true
+ }
+
+ return false
+}
+
+// Prepended_Concatenation_Mark
+is_prepended_concatenation_mark :: proc(r: rune) -> bool {
+ switch r {
+ case 0x00600 ..= 0x00605,
+ 0x006DD,
+ 0x0070F,
+ 0x00890 ..= 0x00891,
+ 0x008E2,
+ 0x110BD,
+ 0x110CD:
+ return true
+ case:
+ return false
+ }
+}
+
+// General_Category=Spacing_Mark
+is_spacing_mark :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, spacing_mark_ranges[:], len(spacing_mark_ranges)/2, 2)
+ if p >= 0 && spacing_mark_ranges[p] <= c && c <= spacing_mark_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+// General_Category=Nonspacing_Mark
+is_nonspacing_mark :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, nonspacing_mark_ranges[:], len(nonspacing_mark_ranges)/2, 2)
+ if p >= 0 && nonspacing_mark_ranges[p] <= c && c <= nonspacing_mark_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+// Extended_Pictographic
+is_emoji_extended_pictographic :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, emoji_extended_pictographic_ranges[:], len(emoji_extended_pictographic_ranges)/2, 2)
+ if p >= 0 && emoji_extended_pictographic_ranges[p] <= c && c <= emoji_extended_pictographic_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+// Grapheme_Extend
+is_grapheme_extend :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, grapheme_extend_ranges[:], len(grapheme_extend_ranges)/2, 2)
+ if p >= 0 && grapheme_extend_ranges[p] <= c && c <= grapheme_extend_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+
+// Hangul_Syllable_Type=Leading_Jamo
+is_hangul_syllable_leading :: proc(r: rune) -> bool {
+ return 0x1100 <= r && r <= 0x115F || 0xA960 <= r && r <= 0xA97C
+}
+
+// Hangul_Syllable_Type=Vowel_Jamo
+is_hangul_syllable_vowel :: proc(r: rune) -> bool {
+ return 0x1160 <= r && r <= 0x11A7 || 0xD7B0 <= r && r <= 0xD7C6
+}
+
+// Hangul_Syllable_Type=Trailing_Jamo
+is_hangul_syllable_trailing :: proc(r: rune) -> bool {
+ return 0x11A8 <= r && r <= 0x11FF || 0xD7CB <= r && r <= 0xD7FB
+}
+
+// Hangul_Syllable_Type=LV_Syllable
+is_hangul_syllable_lv :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, hangul_syllable_lv_singlets[:], len(hangul_syllable_lv_singlets), 1)
+ if p >= 0 && c == hangul_syllable_lv_singlets[p] {
+ return true
+ }
+ return false
+}
+
+// Hangul_Syllable_Type=LVT_Syllable
+is_hangul_syllable_lvt :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, hangul_syllable_lvt_ranges[:], len(hangul_syllable_lvt_ranges)/2, 2)
+ if p >= 0 && hangul_syllable_lvt_ranges[p] <= c && c <= hangul_syllable_lvt_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+
+// Indic_Syllabic_Category=Consonant_Preceding_Repha
+is_indic_consonant_preceding_repha :: proc(r: rune) -> bool {
+ switch r {
+ case 0x00D4E,
+ 0x11941,
+ 0x11D46,
+ 0x11F02:
+ return true
+ case:
+ return false
+ }
+}
+
+// Indic_Syllabic_Category=Consonant_Prefixed
+is_indic_consonant_prefixed :: proc(r: rune) -> bool {
+ switch r {
+ case 0x111C2 ..= 0x111C3,
+ 0x1193F,
+ 0x11A3A,
+ 0x11A84 ..= 0x11A89:
+ return true
+ case:
+ return false
+ }
+}
+
+// Indic_Conjunct_Break=Linker
+is_indic_conjunct_break_linker :: proc(r: rune) -> bool {
+ switch r {
+ case 0x094D,
+ 0x09CD,
+ 0x0ACD,
+ 0x0B4D,
+ 0x0C4D,
+ 0x0D4D:
+ return true
+ case:
+ return false
+ }
+}
+
+// Indic_Conjunct_Break=Consonant
+is_indic_conjunct_break_consonant :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, indic_conjunct_break_consonant_ranges[:], len(indic_conjunct_break_consonant_ranges)/2, 2)
+ if p >= 0 && indic_conjunct_break_consonant_ranges[p] <= c && c <= indic_conjunct_break_consonant_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+// Indic_Conjunct_Break=Extend
+is_indic_conjunct_break_extend :: proc(r: rune) -> bool {
+ c := i32(r)
+ p := binary_search(c, indic_conjunct_break_extend_ranges[:], len(indic_conjunct_break_extend_ranges)/2, 2)
+ if p >= 0 && indic_conjunct_break_extend_ranges[p] <= c && c <= indic_conjunct_break_extend_ranges[p+1] {
+ return true
+ }
+ return false
+}
+
+
+/*
+For grapheme text segmentation, from Unicode TR 29 Rev 43:
+
+```
+Indic_Syllabic_Category = Consonant_Preceding_Repha, or
+Indic_Syllabic_Category = Consonant_Prefixed, or
+Prepended_Concatenation_Mark = Yes
+```
+*/
+is_gcb_prepend_class :: proc(r: rune) -> bool {
+ return is_indic_consonant_preceding_repha(r) || is_indic_consonant_prefixed(r) || is_prepended_concatenation_mark(r)
+}
+
+/*
+For grapheme text segmentation, from Unicode TR 29 Rev 43:
+
+```
+Grapheme_Extend = Yes, or
+Emoji_Modifier = Yes
+
+This includes:
+General_Category = Nonspacing_Mark
+General_Category = Enclosing_Mark
+U+200C ZERO WIDTH NON-JOINER
+
+plus a few General_Category = Spacing_Mark needed for canonical equivalence.
+```
+*/
+is_gcb_extend_class :: proc(r: rune) -> bool {
+ return is_grapheme_extend(r) || is_emoji_modifier(r)
+}
+
+//
+// End of Unicode 15.1.0 block.
+//
diff --git a/core/unicode/tables.odin b/core/unicode/tables.odin
index dfa5caaa2..5812e3132 100644
--- a/core/unicode/tables.odin
+++ b/core/unicode/tables.odin
@@ -1270,3 +1270,2452 @@ to_title_singlets := [?]i32{
0x01f1, 501,
0x01f3, 499,
}
+
+//
+// The tables below are accurate as of Unicode 15.1.0.
+//
+
+@(rodata)
+spacing_mark_ranges := [?]i32 {
+0x0903, 0x0903,
+0x093B, 0x093B,
+0x093E, 0x0940,
+0x0949, 0x094C,
+0x094E, 0x094F,
+0x0982, 0x0983,
+0x09BE, 0x09C0,
+0x09C7, 0x09C8,
+0x09CB, 0x09CC,
+0x09D7, 0x09D7,
+0x0A03, 0x0A03,
+0x0A3E, 0x0A40,
+0x0A83, 0x0A83,
+0x0ABE, 0x0AC0,
+0x0AC9, 0x0AC9,
+0x0ACB, 0x0ACC,
+0x0B02, 0x0B03,
+0x0B3E, 0x0B3E,
+0x0B40, 0x0B40,
+0x0B47, 0x0B48,
+0x0B4B, 0x0B4C,
+0x0B57, 0x0B57,
+0x0BBE, 0x0BBF,
+0x0BC1, 0x0BC2,
+0x0BC6, 0x0BC8,
+0x0BCA, 0x0BCC,
+0x0BD7, 0x0BD7,
+0x0C01, 0x0C03,
+0x0C41, 0x0C44,
+0x0C82, 0x0C83,
+0x0CBE, 0x0CBE,
+0x0CC0, 0x0CC4,
+0x0CC7, 0x0CC8,
+0x0CCA, 0x0CCB,
+0x0CD5, 0x0CD6,
+0x0CF3, 0x0CF3,
+0x0D02, 0x0D03,
+0x0D3E, 0x0D40,
+0x0D46, 0x0D48,
+0x0D4A, 0x0D4C,
+0x0D57, 0x0D57,
+0x0D82, 0x0D83,
+0x0DCF, 0x0DD1,
+0x0DD8, 0x0DDF,
+0x0DF2, 0x0DF3,
+0x0F3E, 0x0F3F,
+0x0F7F, 0x0F7F,
+0x102B, 0x102C,
+0x1031, 0x1031,
+0x1038, 0x1038,
+0x103B, 0x103C,
+0x1056, 0x1057,
+0x1062, 0x1064,
+0x1067, 0x106D,
+0x1083, 0x1084,
+0x1087, 0x108C,
+0x108F, 0x108F,
+0x109A, 0x109C,
+0x1715, 0x1715,
+0x1734, 0x1734,
+0x17B6, 0x17B6,
+0x17BE, 0x17C5,
+0x17C7, 0x17C8,
+0x1923, 0x1926,
+0x1929, 0x192B,
+0x1930, 0x1931,
+0x1933, 0x1938,
+0x1A19, 0x1A1A,
+0x1A55, 0x1A55,
+0x1A57, 0x1A57,
+0x1A61, 0x1A61,
+0x1A63, 0x1A64,
+0x1A6D, 0x1A72,
+0x1B04, 0x1B04,
+0x1B35, 0x1B35,
+0x1B3B, 0x1B3B,
+0x1B3D, 0x1B41,
+0x1B43, 0x1B44,
+0x1B82, 0x1B82,
+0x1BA1, 0x1BA1,
+0x1BA6, 0x1BA7,
+0x1BAA, 0x1BAA,
+0x1BE7, 0x1BE7,
+0x1BEA, 0x1BEC,
+0x1BEE, 0x1BEE,
+0x1BF2, 0x1BF3,
+0x1C24, 0x1C2B,
+0x1C34, 0x1C35,
+0x1CE1, 0x1CE1,
+0x1CF7, 0x1CF7,
+0x302E, 0x302F,
+0xA823, 0xA824,
+0xA827, 0xA827,
+0xA880, 0xA881,
+0xA8B4, 0xA8C3,
+0xA952, 0xA953,
+0xA983, 0xA983,
+0xA9B4, 0xA9B5,
+0xA9BA, 0xA9BB,
+0xA9BE, 0xA9C0,
+0xAA2F, 0xAA30,
+0xAA33, 0xAA34,
+0xAA4D, 0xAA4D,
+0xAA7B, 0xAA7B,
+0xAA7D, 0xAA7D,
+0xAAEB, 0xAAEB,
+0xAAEE, 0xAAEF,
+0xAAF5, 0xAAF5,
+0xABE3, 0xABE4,
+0xABE6, 0xABE7,
+0xABE9, 0xABEA,
+0xABEC, 0xABEC,
+0x11000, 0x11000,
+0x11002, 0x11002,
+0x11082, 0x11082,
+0x110B0, 0x110B2,
+0x110B7, 0x110B8,
+0x1112C, 0x1112C,
+0x11145, 0x11146,
+0x11182, 0x11182,
+0x111B3, 0x111B5,
+0x111BF, 0x111C0,
+0x111CE, 0x111CE,
+0x1122C, 0x1122E,
+0x11232, 0x11233,
+0x11235, 0x11235,
+0x112E0, 0x112E2,
+0x11302, 0x11303,
+0x1133E, 0x1133F,
+0x11341, 0x11344,
+0x11347, 0x11348,
+0x1134B, 0x1134D,
+0x11357, 0x11357,
+0x11362, 0x11363,
+0x11435, 0x11437,
+0x11440, 0x11441,
+0x11445, 0x11445,
+0x114B0, 0x114B2,
+0x114B9, 0x114B9,
+0x114BB, 0x114BE,
+0x114C1, 0x114C1,
+0x115AF, 0x115B1,
+0x115B8, 0x115BB,
+0x115BE, 0x115BE,
+0x11630, 0x11632,
+0x1163B, 0x1163C,
+0x1163E, 0x1163E,
+0x116AC, 0x116AC,
+0x116AE, 0x116AF,
+0x116B6, 0x116B6,
+0x11720, 0x11721,
+0x11726, 0x11726,
+0x1182C, 0x1182E,
+0x11838, 0x11838,
+0x11930, 0x11935,
+0x11937, 0x11938,
+0x1193D, 0x1193D,
+0x11940, 0x11940,
+0x11942, 0x11942,
+0x119D1, 0x119D3,
+0x119DC, 0x119DF,
+0x119E4, 0x119E4,
+0x11A39, 0x11A39,
+0x11A57, 0x11A58,
+0x11A97, 0x11A97,
+0x11C2F, 0x11C2F,
+0x11C3E, 0x11C3E,
+0x11CA9, 0x11CA9,
+0x11CB1, 0x11CB1,
+0x11CB4, 0x11CB4,
+0x11D8A, 0x11D8E,
+0x11D93, 0x11D94,
+0x11D96, 0x11D96,
+0x11EF5, 0x11EF6,
+0x11F03, 0x11F03,
+0x11F34, 0x11F35,
+0x11F3E, 0x11F3F,
+0x11F41, 0x11F41,
+0x16F51, 0x16F87,
+0x16FF0, 0x16FF1,
+0x1D165, 0x1D166,
+0x1D16D, 0x1D172,
+}
+
+@(rodata)
+nonspacing_mark_ranges := [?]i32 {
+0x0300, 0x036F,
+0x0483, 0x0487,
+0x0591, 0x05BD,
+0x05BF, 0x05BF,
+0x05C1, 0x05C2,
+0x05C4, 0x05C5,
+0x05C7, 0x05C7,
+0x0610, 0x061A,
+0x064B, 0x065F,
+0x0670, 0x0670,
+0x06D6, 0x06DC,
+0x06DF, 0x06E4,
+0x06E7, 0x06E8,
+0x06EA, 0x06ED,
+0x0711, 0x0711,
+0x0730, 0x074A,
+0x07A6, 0x07B0,
+0x07EB, 0x07F3,
+0x07FD, 0x07FD,
+0x0816, 0x0819,
+0x081B, 0x0823,
+0x0825, 0x0827,
+0x0829, 0x082D,
+0x0859, 0x085B,
+0x0898, 0x089F,
+0x08CA, 0x08E1,
+0x08E3, 0x0902,
+0x093A, 0x093A,
+0x093C, 0x093C,
+0x0941, 0x0948,
+0x094D, 0x094D,
+0x0951, 0x0957,
+0x0962, 0x0963,
+0x0981, 0x0981,
+0x09BC, 0x09BC,
+0x09C1, 0x09C4,
+0x09CD, 0x09CD,
+0x09E2, 0x09E3,
+0x09FE, 0x09FE,
+0x0A01, 0x0A02,
+0x0A3C, 0x0A3C,
+0x0A41, 0x0A42,
+0x0A47, 0x0A48,
+0x0A4B, 0x0A4D,
+0x0A51, 0x0A51,
+0x0A70, 0x0A71,
+0x0A75, 0x0A75,
+0x0A81, 0x0A82,
+0x0ABC, 0x0ABC,
+0x0AC1, 0x0AC5,
+0x0AC7, 0x0AC8,
+0x0ACD, 0x0ACD,
+0x0AE2, 0x0AE3,
+0x0AFA, 0x0AFF,
+0x0B01, 0x0B01,
+0x0B3C, 0x0B3C,
+0x0B3F, 0x0B3F,
+0x0B41, 0x0B44,
+0x0B4D, 0x0B4D,
+0x0B55, 0x0B56,
+0x0B62, 0x0B63,
+0x0B82, 0x0B82,
+0x0BC0, 0x0BC0,
+0x0BCD, 0x0BCD,
+0x0C00, 0x0C00,
+0x0C04, 0x0C04,
+0x0C3C, 0x0C3C,
+0x0C3E, 0x0C40,
+0x0C46, 0x0C48,
+0x0C4A, 0x0C4D,
+0x0C55, 0x0C56,
+0x0C62, 0x0C63,
+0x0C81, 0x0C81,
+0x0CBC, 0x0CBC,
+0x0CBF, 0x0CBF,
+0x0CC6, 0x0CC6,
+0x0CCC, 0x0CCD,
+0x0CE2, 0x0CE3,
+0x0D00, 0x0D01,
+0x0D3B, 0x0D3C,
+0x0D41, 0x0D44,
+0x0D4D, 0x0D4D,
+0x0D62, 0x0D63,
+0x0D81, 0x0D81,
+0x0DCA, 0x0DCA,
+0x0DD2, 0x0DD4,
+0x0DD6, 0x0DD6,
+0x0E31, 0x0E31,
+0x0E34, 0x0E3A,
+0x0E47, 0x0E4E,
+0x0EB1, 0x0EB1,
+0x0EB4, 0x0EBC,
+0x0EC8, 0x0ECE,
+0x0F18, 0x0F19,
+0x0F35, 0x0F35,
+0x0F37, 0x0F37,
+0x0F39, 0x0F39,
+0x0F71, 0x0F7E,
+0x0F80, 0x0F84,
+0x0F86, 0x0F87,
+0x0F8D, 0x0F97,
+0x0F99, 0x0FBC,
+0x0FC6, 0x0FC6,
+0x102D, 0x1030,
+0x1032, 0x1037,
+0x1039, 0x103A,
+0x103D, 0x103E,
+0x1058, 0x1059,
+0x105E, 0x1060,
+0x1071, 0x1074,
+0x1082, 0x1082,
+0x1085, 0x1086,
+0x108D, 0x108D,
+0x109D, 0x109D,
+0x135D, 0x135F,
+0x1712, 0x1714,
+0x1732, 0x1733,
+0x1752, 0x1753,
+0x1772, 0x1773,
+0x17B4, 0x17B5,
+0x17B7, 0x17BD,
+0x17C6, 0x17C6,
+0x17C9, 0x17D3,
+0x17DD, 0x17DD,
+0x180B, 0x180D,
+0x180F, 0x180F,
+0x1885, 0x1886,
+0x18A9, 0x18A9,
+0x1920, 0x1922,
+0x1927, 0x1928,
+0x1932, 0x1932,
+0x1939, 0x193B,
+0x1A17, 0x1A18,
+0x1A1B, 0x1A1B,
+0x1A56, 0x1A56,
+0x1A58, 0x1A5E,
+0x1A60, 0x1A60,
+0x1A62, 0x1A62,
+0x1A65, 0x1A6C,
+0x1A73, 0x1A7C,
+0x1A7F, 0x1A7F,
+0x1AB0, 0x1ABD,
+0x1ABF, 0x1ACE,
+0x1B00, 0x1B03,
+0x1B34, 0x1B34,
+0x1B36, 0x1B3A,
+0x1B3C, 0x1B3C,
+0x1B42, 0x1B42,
+0x1B6B, 0x1B73,
+0x1B80, 0x1B81,
+0x1BA2, 0x1BA5,
+0x1BA8, 0x1BA9,
+0x1BAB, 0x1BAD,
+0x1BE6, 0x1BE6,
+0x1BE8, 0x1BE9,
+0x1BED, 0x1BED,
+0x1BEF, 0x1BF1,
+0x1C2C, 0x1C33,
+0x1C36, 0x1C37,
+0x1CD0, 0x1CD2,
+0x1CD4, 0x1CE0,
+0x1CE2, 0x1CE8,
+0x1CED, 0x1CED,
+0x1CF4, 0x1CF4,
+0x1CF8, 0x1CF9,
+0x1DC0, 0x1DFF,
+0x20D0, 0x20DC,
+0x20E1, 0x20E1,
+0x20E5, 0x20F0,
+0x2CEF, 0x2CF1,
+0x2D7F, 0x2D7F,
+0x2DE0, 0x2DFF,
+0x302A, 0x302D,
+0x3099, 0x309A,
+0xA66F, 0xA66F,
+0xA674, 0xA67D,
+0xA69E, 0xA69F,
+0xA6F0, 0xA6F1,
+0xA802, 0xA802,
+0xA806, 0xA806,
+0xA80B, 0xA80B,
+0xA825, 0xA826,
+0xA82C, 0xA82C,
+0xA8C4, 0xA8C5,
+0xA8E0, 0xA8F1,
+0xA8FF, 0xA8FF,
+0xA926, 0xA92D,
+0xA947, 0xA951,
+0xA980, 0xA982,
+0xA9B3, 0xA9B3,
+0xA9B6, 0xA9B9,
+0xA9BC, 0xA9BD,
+0xA9E5, 0xA9E5,
+0xAA29, 0xAA2E,
+0xAA31, 0xAA32,
+0xAA35, 0xAA36,
+0xAA43, 0xAA43,
+0xAA4C, 0xAA4C,
+0xAA7C, 0xAA7C,
+0xAAB0, 0xAAB0,
+0xAAB2, 0xAAB4,
+0xAAB7, 0xAAB8,
+0xAABE, 0xAABF,
+0xAAC1, 0xAAC1,
+0xAAEC, 0xAAED,
+0xAAF6, 0xAAF6,
+0xABE5, 0xABE5,
+0xABE8, 0xABE8,
+0xABED, 0xABED,
+0xFB1E, 0xFB1E,
+0xFE00, 0xFE0F,
+0xFE20, 0xFE2F,
+0x101FD, 0x101FD,
+0x102E0, 0x102E0,
+0x10376, 0x1037A,
+0x10A01, 0x10A03,
+0x10A05, 0x10A06,
+0x10A0C, 0x10A0F,
+0x10A38, 0x10A3A,
+0x10A3F, 0x10A3F,
+0x10AE5, 0x10AE6,
+0x10D24, 0x10D27,
+0x10EAB, 0x10EAC,
+0x10EFD, 0x10EFF,
+0x10F46, 0x10F50,
+0x10F82, 0x10F85,
+0x11001, 0x11001,
+0x11038, 0x11046,
+0x11070, 0x11070,
+0x11073, 0x11074,
+0x1107F, 0x11081,
+0x110B3, 0x110B6,
+0x110B9, 0x110BA,
+0x110C2, 0x110C2,
+0x11100, 0x11102,
+0x11127, 0x1112B,
+0x1112D, 0x11134,
+0x11173, 0x11173,
+0x11180, 0x11181,
+0x111B6, 0x111BE,
+0x111C9, 0x111CC,
+0x111CF, 0x111CF,
+0x1122F, 0x11231,
+0x11234, 0x11234,
+0x11236, 0x11237,
+0x1123E, 0x1123E,
+0x11241, 0x11241,
+0x112DF, 0x112DF,
+0x112E3, 0x112EA,
+0x11300, 0x11301,
+0x1133B, 0x1133C,
+0x11340, 0x11340,
+0x11366, 0x1136C,
+0x11370, 0x11374,
+0x11438, 0x1143F,
+0x11442, 0x11444,
+0x11446, 0x11446,
+0x1145E, 0x1145E,
+0x114B3, 0x114B8,
+0x114BA, 0x114BA,
+0x114BF, 0x114C0,
+0x114C2, 0x114C3,
+0x115B2, 0x115B5,
+0x115BC, 0x115BD,
+0x115BF, 0x115C0,
+0x115DC, 0x115DD,
+0x11633, 0x1163A,
+0x1163D, 0x1163D,
+0x1163F, 0x11640,
+0x116AB, 0x116AB,
+0x116AD, 0x116AD,
+0x116B0, 0x116B5,
+0x116B7, 0x116B7,
+0x1171D, 0x1171F,
+0x11722, 0x11725,
+0x11727, 0x1172B,
+0x1182F, 0x11837,
+0x11839, 0x1183A,
+0x1193B, 0x1193C,
+0x1193E, 0x1193E,
+0x11943, 0x11943,
+0x119D4, 0x119D7,
+0x119DA, 0x119DB,
+0x119E0, 0x119E0,
+0x11A01, 0x11A0A,
+0x11A33, 0x11A38,
+0x11A3B, 0x11A3E,
+0x11A47, 0x11A47,
+0x11A51, 0x11A56,
+0x11A59, 0x11A5B,
+0x11A8A, 0x11A96,
+0x11A98, 0x11A99,
+0x11C30, 0x11C36,
+0x11C38, 0x11C3D,
+0x11C3F, 0x11C3F,
+0x11C92, 0x11CA7,
+0x11CAA, 0x11CB0,
+0x11CB2, 0x11CB3,
+0x11CB5, 0x11CB6,
+0x11D31, 0x11D36,
+0x11D3A, 0x11D3A,
+0x11D3C, 0x11D3D,
+0x11D3F, 0x11D45,
+0x11D47, 0x11D47,
+0x11D90, 0x11D91,
+0x11D95, 0x11D95,
+0x11D97, 0x11D97,
+0x11EF3, 0x11EF4,
+0x11F00, 0x11F01,
+0x11F36, 0x11F3A,
+0x11F40, 0x11F40,
+0x11F42, 0x11F42,
+0x13440, 0x13440,
+0x13447, 0x13455,
+0x16AF0, 0x16AF4,
+0x16B30, 0x16B36,
+0x16F4F, 0x16F4F,
+0x16F8F, 0x16F92,
+0x16FE4, 0x16FE4,
+0x1BC9D, 0x1BC9E,
+0x1CF00, 0x1CF2D,
+0x1CF30, 0x1CF46,
+0x1D167, 0x1D169,
+0x1D17B, 0x1D182,
+0x1D185, 0x1D18B,
+0x1D1AA, 0x1D1AD,
+0x1D242, 0x1D244,
+0x1DA00, 0x1DA36,
+0x1DA3B, 0x1DA6C,
+0x1DA75, 0x1DA75,
+0x1DA84, 0x1DA84,
+0x1DA9B, 0x1DA9F,
+0x1DAA1, 0x1DAAF,
+0x1E000, 0x1E006,
+0x1E008, 0x1E018,
+0x1E01B, 0x1E021,
+0x1E023, 0x1E024,
+0x1E026, 0x1E02A,
+0x1E08F, 0x1E08F,
+0x1E130, 0x1E136,
+0x1E2AE, 0x1E2AE,
+0x1E2EC, 0x1E2EF,
+0x1E4EC, 0x1E4EF,
+0x1E8D0, 0x1E8D6,
+0x1E944, 0x1E94A,
+0xE0100, 0xE01EF,
+}
+
+@(rodata)
+emoji_extended_pictographic_ranges := [?]i32 {
+0x00A9, 0x00A9,
+0x00AE, 0x00AE,
+0x203C, 0x203C,
+0x2049, 0x2049,
+0x2122, 0x2122,
+0x2139, 0x2139,
+0x2194, 0x2199,
+0x21A9, 0x21AA,
+0x231A, 0x231B,
+0x2328, 0x2328,
+0x2388, 0x2388,
+0x23CF, 0x23CF,
+0x23E9, 0x23EC,
+0x23ED, 0x23EE,
+0x23EF, 0x23EF,
+0x23F0, 0x23F0,
+0x23F1, 0x23F2,
+0x23F3, 0x23F3,
+0x23F8, 0x23FA,
+0x24C2, 0x24C2,
+0x25AA, 0x25AB,
+0x25B6, 0x25B6,
+0x25C0, 0x25C0,
+0x25FB, 0x25FE,
+0x2600, 0x2601,
+0x2602, 0x2603,
+0x2604, 0x2604,
+0x2605, 0x2605,
+0x2607, 0x260D,
+0x260E, 0x260E,
+0x260F, 0x2610,
+0x2611, 0x2611,
+0x2612, 0x2612,
+0x2614, 0x2615,
+0x2616, 0x2617,
+0x2618, 0x2618,
+0x2619, 0x261C,
+0x261D, 0x261D,
+0x261E, 0x261F,
+0x2620, 0x2620,
+0x2621, 0x2621,
+0x2622, 0x2623,
+0x2624, 0x2625,
+0x2626, 0x2626,
+0x2627, 0x2629,
+0x262A, 0x262A,
+0x262B, 0x262D,
+0x262E, 0x262E,
+0x262F, 0x262F,
+0x2630, 0x2637,
+0x2638, 0x2639,
+0x263A, 0x263A,
+0x263B, 0x263F,
+0x2640, 0x2640,
+0x2641, 0x2641,
+0x2642, 0x2642,
+0x2643, 0x2647,
+0x2648, 0x2653,
+0x2654, 0x265E,
+0x265F, 0x265F,
+0x2660, 0x2660,
+0x2661, 0x2662,
+0x2663, 0x2663,
+0x2664, 0x2664,
+0x2665, 0x2666,
+0x2667, 0x2667,
+0x2668, 0x2668,
+0x2669, 0x267A,
+0x267B, 0x267B,
+0x267C, 0x267D,
+0x267E, 0x267E,
+0x267F, 0x267F,
+0x2680, 0x2685,
+0x2690, 0x2691,
+0x2692, 0x2692,
+0x2693, 0x2693,
+0x2694, 0x2694,
+0x2695, 0x2695,
+0x2696, 0x2697,
+0x2698, 0x2698,
+0x2699, 0x2699,
+0x269A, 0x269A,
+0x269B, 0x269C,
+0x269D, 0x269F,
+0x26A0, 0x26A1,
+0x26A2, 0x26A6,
+0x26A7, 0x26A7,
+0x26A8, 0x26A9,
+0x26AA, 0x26AB,
+0x26AC, 0x26AF,
+0x26B0, 0x26B1,
+0x26B2, 0x26BC,
+0x26BD, 0x26BE,
+0x26BF, 0x26C3,
+0x26C4, 0x26C5,
+0x26C6, 0x26C7,
+0x26C8, 0x26C8,
+0x26C9, 0x26CD,
+0x26CE, 0x26CE,
+0x26CF, 0x26CF,
+0x26D0, 0x26D0,
+0x26D1, 0x26D1,
+0x26D2, 0x26D2,
+0x26D3, 0x26D3,
+0x26D4, 0x26D4,
+0x26D5, 0x26E8,
+0x26E9, 0x26E9,
+0x26EA, 0x26EA,
+0x26EB, 0x26EF,
+0x26F0, 0x26F1,
+0x26F2, 0x26F3,
+0x26F4, 0x26F4,
+0x26F5, 0x26F5,
+0x26F6, 0x26F6,
+0x26F7, 0x26F9,
+0x26FA, 0x26FA,
+0x26FB, 0x26FC,
+0x26FD, 0x26FD,
+0x26FE, 0x2701,
+0x2702, 0x2702,
+0x2703, 0x2704,
+0x2705, 0x2705,
+0x2708, 0x270C,
+0x270D, 0x270D,
+0x270E, 0x270E,
+0x270F, 0x270F,
+0x2710, 0x2711,
+0x2712, 0x2712,
+0x2714, 0x2714,
+0x2716, 0x2716,
+0x271D, 0x271D,
+0x2721, 0x2721,
+0x2728, 0x2728,
+0x2733, 0x2734,
+0x2744, 0x2744,
+0x2747, 0x2747,
+0x274C, 0x274C,
+0x274E, 0x274E,
+0x2753, 0x2755,
+0x2757, 0x2757,
+0x2763, 0x2763,
+0x2764, 0x2764,
+0x2765, 0x2767,
+0x2795, 0x2797,
+0x27A1, 0x27A1,
+0x27B0, 0x27B0,
+0x27BF, 0x27BF,
+0x2934, 0x2935,
+0x2B05, 0x2B07,
+0x2B1B, 0x2B1C,
+0x2B50, 0x2B50,
+0x2B55, 0x2B55,
+0x3030, 0x3030,
+0x303D, 0x303D,
+0x3297, 0x3297,
+0x3299, 0x3299,
+0x1F000, 0x1F003,
+0x1F004, 0x1F004,
+0x1F005, 0x1F0CE,
+0x1F0CF, 0x1F0CF,
+0x1F0D0, 0x1F0FF,
+0x1F10D, 0x1F10F,
+0x1F12F, 0x1F12F,
+0x1F16C, 0x1F16F,
+0x1F170, 0x1F171,
+0x1F17E, 0x1F17F,
+0x1F18E, 0x1F18E,
+0x1F191, 0x1F19A,
+0x1F1AD, 0x1F1E5,
+0x1F201, 0x1F202,
+0x1F203, 0x1F20F,
+0x1F21A, 0x1F21A,
+0x1F22F, 0x1F22F,
+0x1F232, 0x1F23A,
+0x1F23C, 0x1F23F,
+0x1F249, 0x1F24F,
+0x1F250, 0x1F251,
+0x1F252, 0x1F2FF,
+0x1F300, 0x1F30C,
+0x1F30D, 0x1F30E,
+0x1F30F, 0x1F30F,
+0x1F310, 0x1F310,
+0x1F311, 0x1F311,
+0x1F312, 0x1F312,
+0x1F313, 0x1F315,
+0x1F316, 0x1F318,
+0x1F319, 0x1F319,
+0x1F31A, 0x1F31A,
+0x1F31B, 0x1F31B,
+0x1F31C, 0x1F31C,
+0x1F31D, 0x1F31E,
+0x1F31F, 0x1F320,
+0x1F321, 0x1F321,
+0x1F322, 0x1F323,
+0x1F324, 0x1F32C,
+0x1F32D, 0x1F32F,
+0x1F330, 0x1F331,
+0x1F332, 0x1F333,
+0x1F334, 0x1F335,
+0x1F336, 0x1F336,
+0x1F337, 0x1F34A,
+0x1F34B, 0x1F34B,
+0x1F34C, 0x1F34F,
+0x1F350, 0x1F350,
+0x1F351, 0x1F37B,
+0x1F37C, 0x1F37C,
+0x1F37D, 0x1F37D,
+0x1F37E, 0x1F37F,
+0x1F380, 0x1F393,
+0x1F394, 0x1F395,
+0x1F396, 0x1F397,
+0x1F398, 0x1F398,
+0x1F399, 0x1F39B,
+0x1F39C, 0x1F39D,
+0x1F39E, 0x1F39F,
+0x1F3A0, 0x1F3C4,
+0x1F3C5, 0x1F3C5,
+0x1F3C6, 0x1F3C6,
+0x1F3C7, 0x1F3C7,
+0x1F3C8, 0x1F3C8,
+0x1F3C9, 0x1F3C9,
+0x1F3CA, 0x1F3CA,
+0x1F3CB, 0x1F3CE,
+0x1F3CF, 0x1F3D3,
+0x1F3D4, 0x1F3DF,
+0x1F3E0, 0x1F3E3,
+0x1F3E4, 0x1F3E4,
+0x1F3E5, 0x1F3F0,
+0x1F3F1, 0x1F3F2,
+0x1F3F3, 0x1F3F3,
+0x1F3F4, 0x1F3F4,
+0x1F3F5, 0x1F3F5,
+0x1F3F6, 0x1F3F6,
+0x1F3F7, 0x1F3F7,
+0x1F3F8, 0x1F3FA,
+0x1F400, 0x1F407,
+0x1F408, 0x1F408,
+0x1F409, 0x1F40B,
+0x1F40C, 0x1F40E,
+0x1F40F, 0x1F410,
+0x1F411, 0x1F412,
+0x1F413, 0x1F413,
+0x1F414, 0x1F414,
+0x1F415, 0x1F415,
+0x1F416, 0x1F416,
+0x1F417, 0x1F429,
+0x1F42A, 0x1F42A,
+0x1F42B, 0x1F43E,
+0x1F43F, 0x1F43F,
+0x1F440, 0x1F440,
+0x1F441, 0x1F441,
+0x1F442, 0x1F464,
+0x1F465, 0x1F465,
+0x1F466, 0x1F46B,
+0x1F46C, 0x1F46D,
+0x1F46E, 0x1F4AC,
+0x1F4AD, 0x1F4AD,
+0x1F4AE, 0x1F4B5,
+0x1F4B6, 0x1F4B7,
+0x1F4B8, 0x1F4EB,
+0x1F4EC, 0x1F4ED,
+0x1F4EE, 0x1F4EE,
+0x1F4EF, 0x1F4EF,
+0x1F4F0, 0x1F4F4,
+0x1F4F5, 0x1F4F5,
+0x1F4F6, 0x1F4F7,
+0x1F4F8, 0x1F4F8,
+0x1F4F9, 0x1F4FC,
+0x1F4FD, 0x1F4FD,
+0x1F4FE, 0x1F4FE,
+0x1F4FF, 0x1F502,
+0x1F503, 0x1F503,
+0x1F504, 0x1F507,
+0x1F508, 0x1F508,
+0x1F509, 0x1F509,
+0x1F50A, 0x1F514,
+0x1F515, 0x1F515,
+0x1F516, 0x1F52B,
+0x1F52C, 0x1F52D,
+0x1F52E, 0x1F53D,
+0x1F546, 0x1F548,
+0x1F549, 0x1F54A,
+0x1F54B, 0x1F54E,
+0x1F54F, 0x1F54F,
+0x1F550, 0x1F55B,
+0x1F55C, 0x1F567,
+0x1F568, 0x1F56E,
+0x1F56F, 0x1F570,
+0x1F571, 0x1F572,
+0x1F573, 0x1F579,
+0x1F57A, 0x1F57A,
+0x1F57B, 0x1F586,
+0x1F587, 0x1F587,
+0x1F588, 0x1F589,
+0x1F58A, 0x1F58D,
+0x1F58E, 0x1F58F,
+0x1F590, 0x1F590,
+0x1F591, 0x1F594,
+0x1F595, 0x1F596,
+0x1F597, 0x1F5A3,
+0x1F5A4, 0x1F5A4,
+0x1F5A5, 0x1F5A5,
+0x1F5A6, 0x1F5A7,
+0x1F5A8, 0x1F5A8,
+0x1F5A9, 0x1F5B0,
+0x1F5B1, 0x1F5B2,
+0x1F5B3, 0x1F5BB,
+0x1F5BC, 0x1F5BC,
+0x1F5BD, 0x1F5C1,
+0x1F5C2, 0x1F5C4,
+0x1F5C5, 0x1F5D0,
+0x1F5D1, 0x1F5D3,
+0x1F5D4, 0x1F5DB,
+0x1F5DC, 0x1F5DE,
+0x1F5DF, 0x1F5E0,
+0x1F5E1, 0x1F5E1,
+0x1F5E2, 0x1F5E2,
+0x1F5E3, 0x1F5E3,
+0x1F5E4, 0x1F5E7,
+0x1F5E8, 0x1F5E8,
+0x1F5E9, 0x1F5EE,
+0x1F5EF, 0x1F5EF,
+0x1F5F0, 0x1F5F2,
+0x1F5F3, 0x1F5F3,
+0x1F5F4, 0x1F5F9,
+0x1F5FA, 0x1F5FA,
+0x1F5FB, 0x1F5FF,
+0x1F600, 0x1F600,
+0x1F601, 0x1F606,
+0x1F607, 0x1F608,
+0x1F609, 0x1F60D,
+0x1F60E, 0x1F60E,
+0x1F60F, 0x1F60F,
+0x1F610, 0x1F610,
+0x1F611, 0x1F611,
+0x1F612, 0x1F614,
+0x1F615, 0x1F615,
+0x1F616, 0x1F616,
+0x1F617, 0x1F617,
+0x1F618, 0x1F618,
+0x1F619, 0x1F619,
+0x1F61A, 0x1F61A,
+0x1F61B, 0x1F61B,
+0x1F61C, 0x1F61E,
+0x1F61F, 0x1F61F,
+0x1F620, 0x1F625,
+0x1F626, 0x1F627,
+0x1F628, 0x1F62B,
+0x1F62C, 0x1F62C,
+0x1F62D, 0x1F62D,
+0x1F62E, 0x1F62F,
+0x1F630, 0x1F633,
+0x1F634, 0x1F634,
+0x1F635, 0x1F635,
+0x1F636, 0x1F636,
+0x1F637, 0x1F640,
+0x1F641, 0x1F644,
+0x1F645, 0x1F64F,
+0x1F680, 0x1F680,
+0x1F681, 0x1F682,
+0x1F683, 0x1F685,
+0x1F686, 0x1F686,
+0x1F687, 0x1F687,
+0x1F688, 0x1F688,
+0x1F689, 0x1F689,
+0x1F68A, 0x1F68B,
+0x1F68C, 0x1F68C,
+0x1F68D, 0x1F68D,
+0x1F68E, 0x1F68E,
+0x1F68F, 0x1F68F,
+0x1F690, 0x1F690,
+0x1F691, 0x1F693,
+0x1F694, 0x1F694,
+0x1F695, 0x1F695,
+0x1F696, 0x1F696,
+0x1F697, 0x1F697,
+0x1F698, 0x1F698,
+0x1F699, 0x1F69A,
+0x1F69B, 0x1F6A1,
+0x1F6A2, 0x1F6A2,
+0x1F6A3, 0x1F6A3,
+0x1F6A4, 0x1F6A5,
+0x1F6A6, 0x1F6A6,
+0x1F6A7, 0x1F6AD,
+0x1F6AE, 0x1F6B1,
+0x1F6B2, 0x1F6B2,
+0x1F6B3, 0x1F6B5,
+0x1F6B6, 0x1F6B6,
+0x1F6B7, 0x1F6B8,
+0x1F6B9, 0x1F6BE,
+0x1F6BF, 0x1F6BF,
+0x1F6C0, 0x1F6C0,
+0x1F6C1, 0x1F6C5,
+0x1F6C6, 0x1F6CA,
+0x1F6CB, 0x1F6CB,
+0x1F6CC, 0x1F6CC,
+0x1F6CD, 0x1F6CF,
+0x1F6D0, 0x1F6D0,
+0x1F6D1, 0x1F6D2,
+0x1F6D3, 0x1F6D4,
+0x1F6D5, 0x1F6D5,
+0x1F6D6, 0x1F6D7,
+0x1F6D8, 0x1F6DB,
+0x1F6DC, 0x1F6DC,
+0x1F6DD, 0x1F6DF,
+0x1F6E0, 0x1F6E5,
+0x1F6E6, 0x1F6E8,
+0x1F6E9, 0x1F6E9,
+0x1F6EA, 0x1F6EA,
+0x1F6EB, 0x1F6EC,
+0x1F6ED, 0x1F6EF,
+0x1F6F0, 0x1F6F0,
+0x1F6F1, 0x1F6F2,
+0x1F6F3, 0x1F6F3,
+0x1F6F4, 0x1F6F6,
+0x1F6F7, 0x1F6F8,
+0x1F6F9, 0x1F6F9,
+0x1F6FA, 0x1F6FA,
+0x1F6FB, 0x1F6FC,
+0x1F6FD, 0x1F6FF,
+0x1F774, 0x1F77F,
+0x1F7D5, 0x1F7DF,
+0x1F7E0, 0x1F7EB,
+0x1F7EC, 0x1F7EF,
+0x1F7F0, 0x1F7F0,
+0x1F7F1, 0x1F7FF,
+0x1F80C, 0x1F80F,
+0x1F848, 0x1F84F,
+0x1F85A, 0x1F85F,
+0x1F888, 0x1F88F,
+0x1F8AE, 0x1F8FF,
+0x1F90C, 0x1F90C,
+0x1F90D, 0x1F90F,
+0x1F910, 0x1F918,
+0x1F919, 0x1F91E,
+0x1F91F, 0x1F91F,
+0x1F920, 0x1F927,
+0x1F928, 0x1F92F,
+0x1F930, 0x1F930,
+0x1F931, 0x1F932,
+0x1F933, 0x1F93A,
+0x1F93C, 0x1F93E,
+0x1F93F, 0x1F93F,
+0x1F940, 0x1F945,
+0x1F947, 0x1F94B,
+0x1F94C, 0x1F94C,
+0x1F94D, 0x1F94F,
+0x1F950, 0x1F95E,
+0x1F95F, 0x1F96B,
+0x1F96C, 0x1F970,
+0x1F971, 0x1F971,
+0x1F972, 0x1F972,
+0x1F973, 0x1F976,
+0x1F977, 0x1F978,
+0x1F979, 0x1F979,
+0x1F97A, 0x1F97A,
+0x1F97B, 0x1F97B,
+0x1F97C, 0x1F97F,
+0x1F980, 0x1F984,
+0x1F985, 0x1F991,
+0x1F992, 0x1F997,
+0x1F998, 0x1F9A2,
+0x1F9A3, 0x1F9A4,
+0x1F9A5, 0x1F9AA,
+0x1F9AB, 0x1F9AD,
+0x1F9AE, 0x1F9AF,
+0x1F9B0, 0x1F9B9,
+0x1F9BA, 0x1F9BF,
+0x1F9C0, 0x1F9C0,
+0x1F9C1, 0x1F9C2,
+0x1F9C3, 0x1F9CA,
+0x1F9CB, 0x1F9CB,
+0x1F9CC, 0x1F9CC,
+0x1F9CD, 0x1F9CF,
+0x1F9D0, 0x1F9E6,
+0x1F9E7, 0x1F9FF,
+0x1FA00, 0x1FA6F,
+0x1FA70, 0x1FA73,
+0x1FA74, 0x1FA74,
+0x1FA75, 0x1FA77,
+0x1FA78, 0x1FA7A,
+0x1FA7B, 0x1FA7C,
+0x1FA7D, 0x1FA7F,
+0x1FA80, 0x1FA82,
+0x1FA83, 0x1FA86,
+0x1FA87, 0x1FA88,
+0x1FA89, 0x1FA8F,
+0x1FA90, 0x1FA95,
+0x1FA96, 0x1FAA8,
+0x1FAA9, 0x1FAAC,
+0x1FAAD, 0x1FAAF,
+0x1FAB0, 0x1FAB6,
+0x1FAB7, 0x1FABA,
+0x1FABB, 0x1FABD,
+0x1FABE, 0x1FABE,
+0x1FABF, 0x1FABF,
+0x1FAC0, 0x1FAC2,
+0x1FAC3, 0x1FAC5,
+0x1FAC6, 0x1FACD,
+0x1FACE, 0x1FACF,
+0x1FAD0, 0x1FAD6,
+0x1FAD7, 0x1FAD9,
+0x1FADA, 0x1FADB,
+0x1FADC, 0x1FADF,
+0x1FAE0, 0x1FAE7,
+0x1FAE8, 0x1FAE8,
+0x1FAE9, 0x1FAEF,
+0x1FAF0, 0x1FAF6,
+0x1FAF7, 0x1FAF8,
+0x1FAF9, 0x1FAFF,
+0x1FC00, 0x1FFFD,
+}
+
+@(rodata)
+grapheme_extend_ranges := [?]i32 {
+0x0300, 0x036F,
+0x0483, 0x0487,
+0x0488, 0x0489,
+0x0591, 0x05BD,
+0x05BF, 0x05BF,
+0x05C1, 0x05C2,
+0x05C4, 0x05C5,
+0x05C7, 0x05C7,
+0x0610, 0x061A,
+0x064B, 0x065F,
+0x0670, 0x0670,
+0x06D6, 0x06DC,
+0x06DF, 0x06E4,
+0x06E7, 0x06E8,
+0x06EA, 0x06ED,
+0x0711, 0x0711,
+0x0730, 0x074A,
+0x07A6, 0x07B0,
+0x07EB, 0x07F3,
+0x07FD, 0x07FD,
+0x0816, 0x0819,
+0x081B, 0x0823,
+0x0825, 0x0827,
+0x0829, 0x082D,
+0x0859, 0x085B,
+0x0898, 0x089F,
+0x08CA, 0x08E1,
+0x08E3, 0x0902,
+0x093A, 0x093A,
+0x093C, 0x093C,
+0x0941, 0x0948,
+0x094D, 0x094D,
+0x0951, 0x0957,
+0x0962, 0x0963,
+0x0981, 0x0981,
+0x09BC, 0x09BC,
+0x09BE, 0x09BE,
+0x09C1, 0x09C4,
+0x09CD, 0x09CD,
+0x09D7, 0x09D7,
+0x09E2, 0x09E3,
+0x09FE, 0x09FE,
+0x0A01, 0x0A02,
+0x0A3C, 0x0A3C,
+0x0A41, 0x0A42,
+0x0A47, 0x0A48,
+0x0A4B, 0x0A4D,
+0x0A51, 0x0A51,
+0x0A70, 0x0A71,
+0x0A75, 0x0A75,
+0x0A81, 0x0A82,
+0x0ABC, 0x0ABC,
+0x0AC1, 0x0AC5,
+0x0AC7, 0x0AC8,
+0x0ACD, 0x0ACD,
+0x0AE2, 0x0AE3,
+0x0AFA, 0x0AFF,
+0x0B01, 0x0B01,
+0x0B3C, 0x0B3C,
+0x0B3E, 0x0B3E,
+0x0B3F, 0x0B3F,
+0x0B41, 0x0B44,
+0x0B4D, 0x0B4D,
+0x0B55, 0x0B56,
+0x0B57, 0x0B57,
+0x0B62, 0x0B63,
+0x0B82, 0x0B82,
+0x0BBE, 0x0BBE,
+0x0BC0, 0x0BC0,
+0x0BCD, 0x0BCD,
+0x0BD7, 0x0BD7,
+0x0C00, 0x0C00,
+0x0C04, 0x0C04,
+0x0C3C, 0x0C3C,
+0x0C3E, 0x0C40,
+0x0C46, 0x0C48,
+0x0C4A, 0x0C4D,
+0x0C55, 0x0C56,
+0x0C62, 0x0C63,
+0x0C81, 0x0C81,
+0x0CBC, 0x0CBC,
+0x0CBF, 0x0CBF,
+0x0CC2, 0x0CC2,
+0x0CC6, 0x0CC6,
+0x0CCC, 0x0CCD,
+0x0CD5, 0x0CD6,
+0x0CE2, 0x0CE3,
+0x0D00, 0x0D01,
+0x0D3B, 0x0D3C,
+0x0D3E, 0x0D3E,
+0x0D41, 0x0D44,
+0x0D4D, 0x0D4D,
+0x0D57, 0x0D57,
+0x0D62, 0x0D63,
+0x0D81, 0x0D81,
+0x0DCA, 0x0DCA,
+0x0DCF, 0x0DCF,
+0x0DD2, 0x0DD4,
+0x0DD6, 0x0DD6,
+0x0DDF, 0x0DDF,
+0x0E31, 0x0E31,
+0x0E34, 0x0E3A,
+0x0E47, 0x0E4E,
+0x0EB1, 0x0EB1,
+0x0EB4, 0x0EBC,
+0x0EC8, 0x0ECE,
+0x0F18, 0x0F19,
+0x0F35, 0x0F35,
+0x0F37, 0x0F37,
+0x0F39, 0x0F39,
+0x0F71, 0x0F7E,
+0x0F80, 0x0F84,
+0x0F86, 0x0F87,
+0x0F8D, 0x0F97,
+0x0F99, 0x0FBC,
+0x0FC6, 0x0FC6,
+0x102D, 0x1030,
+0x1032, 0x1037,
+0x1039, 0x103A,
+0x103D, 0x103E,
+0x1058, 0x1059,
+0x105E, 0x1060,
+0x1071, 0x1074,
+0x1082, 0x1082,
+0x1085, 0x1086,
+0x108D, 0x108D,
+0x109D, 0x109D,
+0x135D, 0x135F,
+0x1712, 0x1714,
+0x1732, 0x1733,
+0x1752, 0x1753,
+0x1772, 0x1773,
+0x17B4, 0x17B5,
+0x17B7, 0x17BD,
+0x17C6, 0x17C6,
+0x17C9, 0x17D3,
+0x17DD, 0x17DD,
+0x180B, 0x180D,
+0x180F, 0x180F,
+0x1885, 0x1886,
+0x18A9, 0x18A9,
+0x1920, 0x1922,
+0x1927, 0x1928,
+0x1932, 0x1932,
+0x1939, 0x193B,
+0x1A17, 0x1A18,
+0x1A1B, 0x1A1B,
+0x1A56, 0x1A56,
+0x1A58, 0x1A5E,
+0x1A60, 0x1A60,
+0x1A62, 0x1A62,
+0x1A65, 0x1A6C,
+0x1A73, 0x1A7C,
+0x1A7F, 0x1A7F,
+0x1AB0, 0x1ABD,
+0x1ABE, 0x1ABE,
+0x1ABF, 0x1ACE,
+0x1B00, 0x1B03,
+0x1B34, 0x1B34,
+0x1B35, 0x1B35,
+0x1B36, 0x1B3A,
+0x1B3C, 0x1B3C,
+0x1B42, 0x1B42,
+0x1B6B, 0x1B73,
+0x1B80, 0x1B81,
+0x1BA2, 0x1BA5,
+0x1BA8, 0x1BA9,
+0x1BAB, 0x1BAD,
+0x1BE6, 0x1BE6,
+0x1BE8, 0x1BE9,
+0x1BED, 0x1BED,
+0x1BEF, 0x1BF1,
+0x1C2C, 0x1C33,
+0x1C36, 0x1C37,
+0x1CD0, 0x1CD2,
+0x1CD4, 0x1CE0,
+0x1CE2, 0x1CE8,
+0x1CED, 0x1CED,
+0x1CF4, 0x1CF4,
+0x1CF8, 0x1CF9,
+0x1DC0, 0x1DFF,
+0x200C, 0x200C,
+0x20D0, 0x20DC,
+0x20DD, 0x20E0,
+0x20E1, 0x20E1,
+0x20E2, 0x20E4,
+0x20E5, 0x20F0,
+0x2CEF, 0x2CF1,
+0x2D7F, 0x2D7F,
+0x2DE0, 0x2DFF,
+0x302A, 0x302D,
+0x302E, 0x302F,
+0x3099, 0x309A,
+0xA66F, 0xA66F,
+0xA670, 0xA672,
+0xA674, 0xA67D,
+0xA69E, 0xA69F,
+0xA6F0, 0xA6F1,
+0xA802, 0xA802,
+0xA806, 0xA806,
+0xA80B, 0xA80B,
+0xA825, 0xA826,
+0xA82C, 0xA82C,
+0xA8C4, 0xA8C5,
+0xA8E0, 0xA8F1,
+0xA8FF, 0xA8FF,
+0xA926, 0xA92D,
+0xA947, 0xA951,
+0xA980, 0xA982,
+0xA9B3, 0xA9B3,
+0xA9B6, 0xA9B9,
+0xA9BC, 0xA9BD,
+0xA9E5, 0xA9E5,
+0xAA29, 0xAA2E,
+0xAA31, 0xAA32,
+0xAA35, 0xAA36,
+0xAA43, 0xAA43,
+0xAA4C, 0xAA4C,
+0xAA7C, 0xAA7C,
+0xAAB0, 0xAAB0,
+0xAAB2, 0xAAB4,
+0xAAB7, 0xAAB8,
+0xAABE, 0xAABF,
+0xAAC1, 0xAAC1,
+0xAAEC, 0xAAED,
+0xAAF6, 0xAAF6,
+0xABE5, 0xABE5,
+0xABE8, 0xABE8,
+0xABED, 0xABED,
+0xFB1E, 0xFB1E,
+0xFE00, 0xFE0F,
+0xFE20, 0xFE2F,
+0xFF9E, 0xFF9F,
+0x101FD, 0x101FD,
+0x102E0, 0x102E0,
+0x10376, 0x1037A,
+0x10A01, 0x10A03,
+0x10A05, 0x10A06,
+0x10A0C, 0x10A0F,
+0x10A38, 0x10A3A,
+0x10A3F, 0x10A3F,
+0x10AE5, 0x10AE6,
+0x10D24, 0x10D27,
+0x10EAB, 0x10EAC,
+0x10EFD, 0x10EFF,
+0x10F46, 0x10F50,
+0x10F82, 0x10F85,
+0x11001, 0x11001,
+0x11038, 0x11046,
+0x11070, 0x11070,
+0x11073, 0x11074,
+0x1107F, 0x11081,
+0x110B3, 0x110B6,
+0x110B9, 0x110BA,
+0x110C2, 0x110C2,
+0x11100, 0x11102,
+0x11127, 0x1112B,
+0x1112D, 0x11134,
+0x11173, 0x11173,
+0x11180, 0x11181,
+0x111B6, 0x111BE,
+0x111C9, 0x111CC,
+0x111CF, 0x111CF,
+0x1122F, 0x11231,
+0x11234, 0x11234,
+0x11236, 0x11237,
+0x1123E, 0x1123E,
+0x11241, 0x11241,
+0x112DF, 0x112DF,
+0x112E3, 0x112EA,
+0x11300, 0x11301,
+0x1133B, 0x1133C,
+0x1133E, 0x1133E,
+0x11340, 0x11340,
+0x11357, 0x11357,
+0x11366, 0x1136C,
+0x11370, 0x11374,
+0x11438, 0x1143F,
+0x11442, 0x11444,
+0x11446, 0x11446,
+0x1145E, 0x1145E,
+0x114B0, 0x114B0,
+0x114B3, 0x114B8,
+0x114BA, 0x114BA,
+0x114BD, 0x114BD,
+0x114BF, 0x114C0,
+0x114C2, 0x114C3,
+0x115AF, 0x115AF,
+0x115B2, 0x115B5,
+0x115BC, 0x115BD,
+0x115BF, 0x115C0,
+0x115DC, 0x115DD,
+0x11633, 0x1163A,
+0x1163D, 0x1163D,
+0x1163F, 0x11640,
+0x116AB, 0x116AB,
+0x116AD, 0x116AD,
+0x116B0, 0x116B5,
+0x116B7, 0x116B7,
+0x1171D, 0x1171F,
+0x11722, 0x11725,
+0x11727, 0x1172B,
+0x1182F, 0x11837,
+0x11839, 0x1183A,
+0x11930, 0x11930,
+0x1193B, 0x1193C,
+0x1193E, 0x1193E,
+0x11943, 0x11943,
+0x119D4, 0x119D7,
+0x119DA, 0x119DB,
+0x119E0, 0x119E0,
+0x11A01, 0x11A0A,
+0x11A33, 0x11A38,
+0x11A3B, 0x11A3E,
+0x11A47, 0x11A47,
+0x11A51, 0x11A56,
+0x11A59, 0x11A5B,
+0x11A8A, 0x11A96,
+0x11A98, 0x11A99,
+0x11C30, 0x11C36,
+0x11C38, 0x11C3D,
+0x11C3F, 0x11C3F,
+0x11C92, 0x11CA7,
+0x11CAA, 0x11CB0,
+0x11CB2, 0x11CB3,
+0x11CB5, 0x11CB6,
+0x11D31, 0x11D36,
+0x11D3A, 0x11D3A,
+0x11D3C, 0x11D3D,
+0x11D3F, 0x11D45,
+0x11D47, 0x11D47,
+0x11D90, 0x11D91,
+0x11D95, 0x11D95,
+0x11D97, 0x11D97,
+0x11EF3, 0x11EF4,
+0x11F00, 0x11F01,
+0x11F36, 0x11F3A,
+0x11F40, 0x11F40,
+0x11F42, 0x11F42,
+0x13440, 0x13440,
+0x13447, 0x13455,
+0x16AF0, 0x16AF4,
+0x16B30, 0x16B36,
+0x16F4F, 0x16F4F,
+0x16F8F, 0x16F92,
+0x16FE4, 0x16FE4,
+0x1BC9D, 0x1BC9E,
+0x1CF00, 0x1CF2D,
+0x1CF30, 0x1CF46,
+0x1D165, 0x1D165,
+0x1D167, 0x1D169,
+0x1D16E, 0x1D172,
+0x1D17B, 0x1D182,
+0x1D185, 0x1D18B,
+0x1D1AA, 0x1D1AD,
+0x1D242, 0x1D244,
+0x1DA00, 0x1DA36,
+0x1DA3B, 0x1DA6C,
+0x1DA75, 0x1DA75,
+0x1DA84, 0x1DA84,
+0x1DA9B, 0x1DA9F,
+0x1DAA1, 0x1DAAF,
+0x1E000, 0x1E006,
+0x1E008, 0x1E018,
+0x1E01B, 0x1E021,
+0x1E023, 0x1E024,
+0x1E026, 0x1E02A,
+0x1E08F, 0x1E08F,
+0x1E130, 0x1E136,
+0x1E2AE, 0x1E2AE,
+0x1E2EC, 0x1E2EF,
+0x1E4EC, 0x1E4EF,
+0x1E8D0, 0x1E8D6,
+0x1E944, 0x1E94A,
+0xE0020, 0xE007F,
+0xE0100, 0xE01EF,
+}
+
+@(rodata)
+hangul_syllable_lv_singlets := [?]i32 {
+0xAC00,
+0xAC1C,
+0xAC38,
+0xAC54,
+0xAC70,
+0xAC8C,
+0xACA8,
+0xACC4,
+0xACE0,
+0xACFC,
+0xAD18,
+0xAD34,
+0xAD50,
+0xAD6C,
+0xAD88,
+0xADA4,
+0xADC0,
+0xADDC,
+0xADF8,
+0xAE14,
+0xAE30,
+0xAE4C,
+0xAE68,
+0xAE84,
+0xAEA0,
+0xAEBC,
+0xAED8,
+0xAEF4,
+0xAF10,
+0xAF2C,
+0xAF48,
+0xAF64,
+0xAF80,
+0xAF9C,
+0xAFB8,
+0xAFD4,
+0xAFF0,
+0xB00C,
+0xB028,
+0xB044,
+0xB060,
+0xB07C,
+0xB098,
+0xB0B4,
+0xB0D0,
+0xB0EC,
+0xB108,
+0xB124,
+0xB140,
+0xB15C,
+0xB178,
+0xB194,
+0xB1B0,
+0xB1CC,
+0xB1E8,
+0xB204,
+0xB220,
+0xB23C,
+0xB258,
+0xB274,
+0xB290,
+0xB2AC,
+0xB2C8,
+0xB2E4,
+0xB300,
+0xB31C,
+0xB338,
+0xB354,
+0xB370,
+0xB38C,
+0xB3A8,
+0xB3C4,
+0xB3E0,
+0xB3FC,
+0xB418,
+0xB434,
+0xB450,
+0xB46C,
+0xB488,
+0xB4A4,
+0xB4C0,
+0xB4DC,
+0xB4F8,
+0xB514,
+0xB530,
+0xB54C,
+0xB568,
+0xB584,
+0xB5A0,
+0xB5BC,
+0xB5D8,
+0xB5F4,
+0xB610,
+0xB62C,
+0xB648,
+0xB664,
+0xB680,
+0xB69C,
+0xB6B8,
+0xB6D4,
+0xB6F0,
+0xB70C,
+0xB728,
+0xB744,
+0xB760,
+0xB77C,
+0xB798,
+0xB7B4,
+0xB7D0,
+0xB7EC,
+0xB808,
+0xB824,
+0xB840,
+0xB85C,
+0xB878,
+0xB894,
+0xB8B0,
+0xB8CC,
+0xB8E8,
+0xB904,
+0xB920,
+0xB93C,
+0xB958,
+0xB974,
+0xB990,
+0xB9AC,
+0xB9C8,
+0xB9E4,
+0xBA00,
+0xBA1C,
+0xBA38,
+0xBA54,
+0xBA70,
+0xBA8C,
+0xBAA8,
+0xBAC4,
+0xBAE0,
+0xBAFC,
+0xBB18,
+0xBB34,
+0xBB50,
+0xBB6C,
+0xBB88,
+0xBBA4,
+0xBBC0,
+0xBBDC,
+0xBBF8,
+0xBC14,
+0xBC30,
+0xBC4C,
+0xBC68,
+0xBC84,
+0xBCA0,
+0xBCBC,
+0xBCD8,
+0xBCF4,
+0xBD10,
+0xBD2C,
+0xBD48,
+0xBD64,
+0xBD80,
+0xBD9C,
+0xBDB8,
+0xBDD4,
+0xBDF0,
+0xBE0C,
+0xBE28,
+0xBE44,
+0xBE60,
+0xBE7C,
+0xBE98,
+0xBEB4,
+0xBED0,
+0xBEEC,
+0xBF08,
+0xBF24,
+0xBF40,
+0xBF5C,
+0xBF78,
+0xBF94,
+0xBFB0,
+0xBFCC,
+0xBFE8,
+0xC004,
+0xC020,
+0xC03C,
+0xC058,
+0xC074,
+0xC090,
+0xC0AC,
+0xC0C8,
+0xC0E4,
+0xC100,
+0xC11C,
+0xC138,
+0xC154,
+0xC170,
+0xC18C,
+0xC1A8,
+0xC1C4,
+0xC1E0,
+0xC1FC,
+0xC218,
+0xC234,
+0xC250,
+0xC26C,
+0xC288,
+0xC2A4,
+0xC2C0,
+0xC2DC,
+0xC2F8,
+0xC314,
+0xC330,
+0xC34C,
+0xC368,
+0xC384,
+0xC3A0,
+0xC3BC,
+0xC3D8,
+0xC3F4,
+0xC410,
+0xC42C,
+0xC448,
+0xC464,
+0xC480,
+0xC49C,
+0xC4B8,
+0xC4D4,
+0xC4F0,
+0xC50C,
+0xC528,
+0xC544,
+0xC560,
+0xC57C,
+0xC598,
+0xC5B4,
+0xC5D0,
+0xC5EC,
+0xC608,
+0xC624,
+0xC640,
+0xC65C,
+0xC678,
+0xC694,
+0xC6B0,
+0xC6CC,
+0xC6E8,
+0xC704,
+0xC720,
+0xC73C,
+0xC758,
+0xC774,
+0xC790,
+0xC7AC,
+0xC7C8,
+0xC7E4,
+0xC800,
+0xC81C,
+0xC838,
+0xC854,
+0xC870,
+0xC88C,
+0xC8A8,
+0xC8C4,
+0xC8E0,
+0xC8FC,
+0xC918,
+0xC934,
+0xC950,
+0xC96C,
+0xC988,
+0xC9A4,
+0xC9C0,
+0xC9DC,
+0xC9F8,
+0xCA14,
+0xCA30,
+0xCA4C,
+0xCA68,
+0xCA84,
+0xCAA0,
+0xCABC,
+0xCAD8,
+0xCAF4,
+0xCB10,
+0xCB2C,
+0xCB48,
+0xCB64,
+0xCB80,
+0xCB9C,
+0xCBB8,
+0xCBD4,
+0xCBF0,
+0xCC0C,
+0xCC28,
+0xCC44,
+0xCC60,
+0xCC7C,
+0xCC98,
+0xCCB4,
+0xCCD0,
+0xCCEC,
+0xCD08,
+0xCD24,
+0xCD40,
+0xCD5C,
+0xCD78,
+0xCD94,
+0xCDB0,
+0xCDCC,
+0xCDE8,
+0xCE04,
+0xCE20,
+0xCE3C,
+0xCE58,
+0xCE74,
+0xCE90,
+0xCEAC,
+0xCEC8,
+0xCEE4,
+0xCF00,
+0xCF1C,
+0xCF38,
+0xCF54,
+0xCF70,
+0xCF8C,
+0xCFA8,
+0xCFC4,
+0xCFE0,
+0xCFFC,
+0xD018,
+0xD034,
+0xD050,
+0xD06C,
+0xD088,
+0xD0A4,
+0xD0C0,
+0xD0DC,
+0xD0F8,
+0xD114,
+0xD130,
+0xD14C,
+0xD168,
+0xD184,
+0xD1A0,
+0xD1BC,
+0xD1D8,
+0xD1F4,
+0xD210,
+0xD22C,
+0xD248,
+0xD264,
+0xD280,
+0xD29C,
+0xD2B8,
+0xD2D4,
+0xD2F0,
+0xD30C,
+0xD328,
+0xD344,
+0xD360,
+0xD37C,
+0xD398,
+0xD3B4,
+0xD3D0,
+0xD3EC,
+0xD408,
+0xD424,
+0xD440,
+0xD45C,
+0xD478,
+0xD494,
+0xD4B0,
+0xD4CC,
+0xD4E8,
+0xD504,
+0xD520,
+0xD53C,
+0xD558,
+0xD574,
+0xD590,
+0xD5AC,
+0xD5C8,
+0xD5E4,
+0xD600,
+0xD61C,
+0xD638,
+0xD654,
+0xD670,
+0xD68C,
+0xD6A8,
+0xD6C4,
+0xD6E0,
+0xD6FC,
+0xD718,
+0xD734,
+0xD750,
+0xD76C,
+0xD788,
+}
+
+@(rodata)
+hangul_syllable_lvt_ranges := [?]i32 {
+0xAC01, 0xAC1B,
+0xAC1D, 0xAC37,
+0xAC39, 0xAC53,
+0xAC55, 0xAC6F,
+0xAC71, 0xAC8B,
+0xAC8D, 0xACA7,
+0xACA9, 0xACC3,
+0xACC5, 0xACDF,
+0xACE1, 0xACFB,
+0xACFD, 0xAD17,
+0xAD19, 0xAD33,
+0xAD35, 0xAD4F,
+0xAD51, 0xAD6B,
+0xAD6D, 0xAD87,
+0xAD89, 0xADA3,
+0xADA5, 0xADBF,
+0xADC1, 0xADDB,
+0xADDD, 0xADF7,
+0xADF9, 0xAE13,
+0xAE15, 0xAE2F,
+0xAE31, 0xAE4B,
+0xAE4D, 0xAE67,
+0xAE69, 0xAE83,
+0xAE85, 0xAE9F,
+0xAEA1, 0xAEBB,
+0xAEBD, 0xAED7,
+0xAED9, 0xAEF3,
+0xAEF5, 0xAF0F,
+0xAF11, 0xAF2B,
+0xAF2D, 0xAF47,
+0xAF49, 0xAF63,
+0xAF65, 0xAF7F,
+0xAF81, 0xAF9B,
+0xAF9D, 0xAFB7,
+0xAFB9, 0xAFD3,
+0xAFD5, 0xAFEF,
+0xAFF1, 0xB00B,
+0xB00D, 0xB027,
+0xB029, 0xB043,
+0xB045, 0xB05F,
+0xB061, 0xB07B,
+0xB07D, 0xB097,
+0xB099, 0xB0B3,
+0xB0B5, 0xB0CF,
+0xB0D1, 0xB0EB,
+0xB0ED, 0xB107,
+0xB109, 0xB123,
+0xB125, 0xB13F,
+0xB141, 0xB15B,
+0xB15D, 0xB177,
+0xB179, 0xB193,
+0xB195, 0xB1AF,
+0xB1B1, 0xB1CB,
+0xB1CD, 0xB1E7,
+0xB1E9, 0xB203,
+0xB205, 0xB21F,
+0xB221, 0xB23B,
+0xB23D, 0xB257,
+0xB259, 0xB273,
+0xB275, 0xB28F,
+0xB291, 0xB2AB,
+0xB2AD, 0xB2C7,
+0xB2C9, 0xB2E3,
+0xB2E5, 0xB2FF,
+0xB301, 0xB31B,
+0xB31D, 0xB337,
+0xB339, 0xB353,
+0xB355, 0xB36F,
+0xB371, 0xB38B,
+0xB38D, 0xB3A7,
+0xB3A9, 0xB3C3,
+0xB3C5, 0xB3DF,
+0xB3E1, 0xB3FB,
+0xB3FD, 0xB417,
+0xB419, 0xB433,
+0xB435, 0xB44F,
+0xB451, 0xB46B,
+0xB46D, 0xB487,
+0xB489, 0xB4A3,
+0xB4A5, 0xB4BF,
+0xB4C1, 0xB4DB,
+0xB4DD, 0xB4F7,
+0xB4F9, 0xB513,
+0xB515, 0xB52F,
+0xB531, 0xB54B,
+0xB54D, 0xB567,
+0xB569, 0xB583,
+0xB585, 0xB59F,
+0xB5A1, 0xB5BB,
+0xB5BD, 0xB5D7,
+0xB5D9, 0xB5F3,
+0xB5F5, 0xB60F,
+0xB611, 0xB62B,
+0xB62D, 0xB647,
+0xB649, 0xB663,
+0xB665, 0xB67F,
+0xB681, 0xB69B,
+0xB69D, 0xB6B7,
+0xB6B9, 0xB6D3,
+0xB6D5, 0xB6EF,
+0xB6F1, 0xB70B,
+0xB70D, 0xB727,
+0xB729, 0xB743,
+0xB745, 0xB75F,
+0xB761, 0xB77B,
+0xB77D, 0xB797,
+0xB799, 0xB7B3,
+0xB7B5, 0xB7CF,
+0xB7D1, 0xB7EB,
+0xB7ED, 0xB807,
+0xB809, 0xB823,
+0xB825, 0xB83F,
+0xB841, 0xB85B,
+0xB85D, 0xB877,
+0xB879, 0xB893,
+0xB895, 0xB8AF,
+0xB8B1, 0xB8CB,
+0xB8CD, 0xB8E7,
+0xB8E9, 0xB903,
+0xB905, 0xB91F,
+0xB921, 0xB93B,
+0xB93D, 0xB957,
+0xB959, 0xB973,
+0xB975, 0xB98F,
+0xB991, 0xB9AB,
+0xB9AD, 0xB9C7,
+0xB9C9, 0xB9E3,
+0xB9E5, 0xB9FF,
+0xBA01, 0xBA1B,
+0xBA1D, 0xBA37,
+0xBA39, 0xBA53,
+0xBA55, 0xBA6F,
+0xBA71, 0xBA8B,
+0xBA8D, 0xBAA7,
+0xBAA9, 0xBAC3,
+0xBAC5, 0xBADF,
+0xBAE1, 0xBAFB,
+0xBAFD, 0xBB17,
+0xBB19, 0xBB33,
+0xBB35, 0xBB4F,
+0xBB51, 0xBB6B,
+0xBB6D, 0xBB87,
+0xBB89, 0xBBA3,
+0xBBA5, 0xBBBF,
+0xBBC1, 0xBBDB,
+0xBBDD, 0xBBF7,
+0xBBF9, 0xBC13,
+0xBC15, 0xBC2F,
+0xBC31, 0xBC4B,
+0xBC4D, 0xBC67,
+0xBC69, 0xBC83,
+0xBC85, 0xBC9F,
+0xBCA1, 0xBCBB,
+0xBCBD, 0xBCD7,
+0xBCD9, 0xBCF3,
+0xBCF5, 0xBD0F,
+0xBD11, 0xBD2B,
+0xBD2D, 0xBD47,
+0xBD49, 0xBD63,
+0xBD65, 0xBD7F,
+0xBD81, 0xBD9B,
+0xBD9D, 0xBDB7,
+0xBDB9, 0xBDD3,
+0xBDD5, 0xBDEF,
+0xBDF1, 0xBE0B,
+0xBE0D, 0xBE27,
+0xBE29, 0xBE43,
+0xBE45, 0xBE5F,
+0xBE61, 0xBE7B,
+0xBE7D, 0xBE97,
+0xBE99, 0xBEB3,
+0xBEB5, 0xBECF,
+0xBED1, 0xBEEB,
+0xBEED, 0xBF07,
+0xBF09, 0xBF23,
+0xBF25, 0xBF3F,
+0xBF41, 0xBF5B,
+0xBF5D, 0xBF77,
+0xBF79, 0xBF93,
+0xBF95, 0xBFAF,
+0xBFB1, 0xBFCB,
+0xBFCD, 0xBFE7,
+0xBFE9, 0xC003,
+0xC005, 0xC01F,
+0xC021, 0xC03B,
+0xC03D, 0xC057,
+0xC059, 0xC073,
+0xC075, 0xC08F,
+0xC091, 0xC0AB,
+0xC0AD, 0xC0C7,
+0xC0C9, 0xC0E3,
+0xC0E5, 0xC0FF,
+0xC101, 0xC11B,
+0xC11D, 0xC137,
+0xC139, 0xC153,
+0xC155, 0xC16F,
+0xC171, 0xC18B,
+0xC18D, 0xC1A7,
+0xC1A9, 0xC1C3,
+0xC1C5, 0xC1DF,
+0xC1E1, 0xC1FB,
+0xC1FD, 0xC217,
+0xC219, 0xC233,
+0xC235, 0xC24F,
+0xC251, 0xC26B,
+0xC26D, 0xC287,
+0xC289, 0xC2A3,
+0xC2A5, 0xC2BF,
+0xC2C1, 0xC2DB,
+0xC2DD, 0xC2F7,
+0xC2F9, 0xC313,
+0xC315, 0xC32F,
+0xC331, 0xC34B,
+0xC34D, 0xC367,
+0xC369, 0xC383,
+0xC385, 0xC39F,
+0xC3A1, 0xC3BB,
+0xC3BD, 0xC3D7,
+0xC3D9, 0xC3F3,
+0xC3F5, 0xC40F,
+0xC411, 0xC42B,
+0xC42D, 0xC447,
+0xC449, 0xC463,
+0xC465, 0xC47F,
+0xC481, 0xC49B,
+0xC49D, 0xC4B7,
+0xC4B9, 0xC4D3,
+0xC4D5, 0xC4EF,
+0xC4F1, 0xC50B,
+0xC50D, 0xC527,
+0xC529, 0xC543,
+0xC545, 0xC55F,
+0xC561, 0xC57B,
+0xC57D, 0xC597,
+0xC599, 0xC5B3,
+0xC5B5, 0xC5CF,
+0xC5D1, 0xC5EB,
+0xC5ED, 0xC607,
+0xC609, 0xC623,
+0xC625, 0xC63F,
+0xC641, 0xC65B,
+0xC65D, 0xC677,
+0xC679, 0xC693,
+0xC695, 0xC6AF,
+0xC6B1, 0xC6CB,
+0xC6CD, 0xC6E7,
+0xC6E9, 0xC703,
+0xC705, 0xC71F,
+0xC721, 0xC73B,
+0xC73D, 0xC757,
+0xC759, 0xC773,
+0xC775, 0xC78F,
+0xC791, 0xC7AB,
+0xC7AD, 0xC7C7,
+0xC7C9, 0xC7E3,
+0xC7E5, 0xC7FF,
+0xC801, 0xC81B,
+0xC81D, 0xC837,
+0xC839, 0xC853,
+0xC855, 0xC86F,
+0xC871, 0xC88B,
+0xC88D, 0xC8A7,
+0xC8A9, 0xC8C3,
+0xC8C5, 0xC8DF,
+0xC8E1, 0xC8FB,
+0xC8FD, 0xC917,
+0xC919, 0xC933,
+0xC935, 0xC94F,
+0xC951, 0xC96B,
+0xC96D, 0xC987,
+0xC989, 0xC9A3,
+0xC9A5, 0xC9BF,
+0xC9C1, 0xC9DB,
+0xC9DD, 0xC9F7,
+0xC9F9, 0xCA13,
+0xCA15, 0xCA2F,
+0xCA31, 0xCA4B,
+0xCA4D, 0xCA67,
+0xCA69, 0xCA83,
+0xCA85, 0xCA9F,
+0xCAA1, 0xCABB,
+0xCABD, 0xCAD7,
+0xCAD9, 0xCAF3,
+0xCAF5, 0xCB0F,
+0xCB11, 0xCB2B,
+0xCB2D, 0xCB47,
+0xCB49, 0xCB63,
+0xCB65, 0xCB7F,
+0xCB81, 0xCB9B,
+0xCB9D, 0xCBB7,
+0xCBB9, 0xCBD3,
+0xCBD5, 0xCBEF,
+0xCBF1, 0xCC0B,
+0xCC0D, 0xCC27,
+0xCC29, 0xCC43,
+0xCC45, 0xCC5F,
+0xCC61, 0xCC7B,
+0xCC7D, 0xCC97,
+0xCC99, 0xCCB3,
+0xCCB5, 0xCCCF,
+0xCCD1, 0xCCEB,
+0xCCED, 0xCD07,
+0xCD09, 0xCD23,
+0xCD25, 0xCD3F,
+0xCD41, 0xCD5B,
+0xCD5D, 0xCD77,
+0xCD79, 0xCD93,
+0xCD95, 0xCDAF,
+0xCDB1, 0xCDCB,
+0xCDCD, 0xCDE7,
+0xCDE9, 0xCE03,
+0xCE05, 0xCE1F,
+0xCE21, 0xCE3B,
+0xCE3D, 0xCE57,
+0xCE59, 0xCE73,
+0xCE75, 0xCE8F,
+0xCE91, 0xCEAB,
+0xCEAD, 0xCEC7,
+0xCEC9, 0xCEE3,
+0xCEE5, 0xCEFF,
+0xCF01, 0xCF1B,
+0xCF1D, 0xCF37,
+0xCF39, 0xCF53,
+0xCF55, 0xCF6F,
+0xCF71, 0xCF8B,
+0xCF8D, 0xCFA7,
+0xCFA9, 0xCFC3,
+0xCFC5, 0xCFDF,
+0xCFE1, 0xCFFB,
+0xCFFD, 0xD017,
+0xD019, 0xD033,
+0xD035, 0xD04F,
+0xD051, 0xD06B,
+0xD06D, 0xD087,
+0xD089, 0xD0A3,
+0xD0A5, 0xD0BF,
+0xD0C1, 0xD0DB,
+0xD0DD, 0xD0F7,
+0xD0F9, 0xD113,
+0xD115, 0xD12F,
+0xD131, 0xD14B,
+0xD14D, 0xD167,
+0xD169, 0xD183,
+0xD185, 0xD19F,
+0xD1A1, 0xD1BB,
+0xD1BD, 0xD1D7,
+0xD1D9, 0xD1F3,
+0xD1F5, 0xD20F,
+0xD211, 0xD22B,
+0xD22D, 0xD247,
+0xD249, 0xD263,
+0xD265, 0xD27F,
+0xD281, 0xD29B,
+0xD29D, 0xD2B7,
+0xD2B9, 0xD2D3,
+0xD2D5, 0xD2EF,
+0xD2F1, 0xD30B,
+0xD30D, 0xD327,
+0xD329, 0xD343,
+0xD345, 0xD35F,
+0xD361, 0xD37B,
+0xD37D, 0xD397,
+0xD399, 0xD3B3,
+0xD3B5, 0xD3CF,
+0xD3D1, 0xD3EB,
+0xD3ED, 0xD407,
+0xD409, 0xD423,
+0xD425, 0xD43F,
+0xD441, 0xD45B,
+0xD45D, 0xD477,
+0xD479, 0xD493,
+0xD495, 0xD4AF,
+0xD4B1, 0xD4CB,
+0xD4CD, 0xD4E7,
+0xD4E9, 0xD503,
+0xD505, 0xD51F,
+0xD521, 0xD53B,
+0xD53D, 0xD557,
+0xD559, 0xD573,
+0xD575, 0xD58F,
+0xD591, 0xD5AB,
+0xD5AD, 0xD5C7,
+0xD5C9, 0xD5E3,
+0xD5E5, 0xD5FF,
+0xD601, 0xD61B,
+0xD61D, 0xD637,
+0xD639, 0xD653,
+0xD655, 0xD66F,
+0xD671, 0xD68B,
+0xD68D, 0xD6A7,
+0xD6A9, 0xD6C3,
+0xD6C5, 0xD6DF,
+0xD6E1, 0xD6FB,
+0xD6FD, 0xD717,
+0xD719, 0xD733,
+0xD735, 0xD74F,
+0xD751, 0xD76B,
+0xD76D, 0xD787,
+0xD789, 0xD7A3,
+}
+
+@(rodata)
+indic_conjunct_break_consonant_ranges := [?]i32 {
+0x0915, 0x0939,
+0x0958, 0x095F,
+0x0978, 0x097F,
+0x0995, 0x09A8,
+0x09AA, 0x09B0,
+0x09B2, 0x09B2,
+0x09B6, 0x09B9,
+0x09DC, 0x09DD,
+0x09DF, 0x09DF,
+0x09F0, 0x09F1,
+0x0A95, 0x0AA8,
+0x0AAA, 0x0AB0,
+0x0AB2, 0x0AB3,
+0x0AB5, 0x0AB9,
+0x0AF9, 0x0AF9,
+0x0B15, 0x0B28,
+0x0B2A, 0x0B30,
+0x0B32, 0x0B33,
+0x0B35, 0x0B39,
+0x0B5C, 0x0B5D,
+0x0B5F, 0x0B5F,
+0x0B71, 0x0B71,
+0x0C15, 0x0C28,
+0x0C2A, 0x0C39,
+0x0C58, 0x0C5A,
+0x0D15, 0x0D3A,
+}
+
+@(rodata)
+indic_conjunct_break_extend_ranges := [?]i32 {
+0x0300, 0x034E,
+0x0350, 0x036F,
+0x0483, 0x0487,
+0x0591, 0x05BD,
+0x05BF, 0x05BF,
+0x05C1, 0x05C2,
+0x05C4, 0x05C5,
+0x05C7, 0x05C7,
+0x0610, 0x061A,
+0x064B, 0x065F,
+0x0670, 0x0670,
+0x06D6, 0x06DC,
+0x06DF, 0x06E4,
+0x06E7, 0x06E8,
+0x06EA, 0x06ED,
+0x0711, 0x0711,
+0x0730, 0x074A,
+0x07EB, 0x07F3,
+0x07FD, 0x07FD,
+0x0816, 0x0819,
+0x081B, 0x0823,
+0x0825, 0x0827,
+0x0829, 0x082D,
+0x0859, 0x085B,
+0x0898, 0x089F,
+0x08CA, 0x08E1,
+0x08E3, 0x08FF,
+0x093C, 0x093C,
+0x0951, 0x0954,
+0x09BC, 0x09BC,
+0x09FE, 0x09FE,
+0x0A3C, 0x0A3C,
+0x0ABC, 0x0ABC,
+0x0B3C, 0x0B3C,
+0x0C3C, 0x0C3C,
+0x0C55, 0x0C56,
+0x0CBC, 0x0CBC,
+0x0D3B, 0x0D3C,
+0x0E38, 0x0E3A,
+0x0E48, 0x0E4B,
+0x0EB8, 0x0EBA,
+0x0EC8, 0x0ECB,
+0x0F18, 0x0F19,
+0x0F35, 0x0F35,
+0x0F37, 0x0F37,
+0x0F39, 0x0F39,
+0x0F71, 0x0F72,
+0x0F74, 0x0F74,
+0x0F7A, 0x0F7D,
+0x0F80, 0x0F80,
+0x0F82, 0x0F84,
+0x0F86, 0x0F87,
+0x0FC6, 0x0FC6,
+0x1037, 0x1037,
+0x1039, 0x103A,
+0x108D, 0x108D,
+0x135D, 0x135F,
+0x1714, 0x1714,
+0x17D2, 0x17D2,
+0x17DD, 0x17DD,
+0x18A9, 0x18A9,
+0x1939, 0x193B,
+0x1A17, 0x1A18,
+0x1A60, 0x1A60,
+0x1A75, 0x1A7C,
+0x1A7F, 0x1A7F,
+0x1AB0, 0x1ABD,
+0x1ABF, 0x1ACE,
+0x1B34, 0x1B34,
+0x1B6B, 0x1B73,
+0x1BAB, 0x1BAB,
+0x1BE6, 0x1BE6,
+0x1C37, 0x1C37,
+0x1CD0, 0x1CD2,
+0x1CD4, 0x1CE0,
+0x1CE2, 0x1CE8,
+0x1CED, 0x1CED,
+0x1CF4, 0x1CF4,
+0x1CF8, 0x1CF9,
+0x1DC0, 0x1DFF,
+0x200D, 0x200D,
+0x20D0, 0x20DC,
+0x20E1, 0x20E1,
+0x20E5, 0x20F0,
+0x2CEF, 0x2CF1,
+0x2D7F, 0x2D7F,
+0x2DE0, 0x2DFF,
+0x302A, 0x302D,
+0x302E, 0x302F,
+0x3099, 0x309A,
+0xA66F, 0xA66F,
+0xA674, 0xA67D,
+0xA69E, 0xA69F,
+0xA6F0, 0xA6F1,
+0xA82C, 0xA82C,
+0xA8E0, 0xA8F1,
+0xA92B, 0xA92D,
+0xA9B3, 0xA9B3,
+0xAAB0, 0xAAB0,
+0xAAB2, 0xAAB4,
+0xAAB7, 0xAAB8,
+0xAABE, 0xAABF,
+0xAAC1, 0xAAC1,
+0xAAF6, 0xAAF6,
+0xABED, 0xABED,
+0xFB1E, 0xFB1E,
+0xFE20, 0xFE2F,
+0x101FD, 0x101FD,
+0x102E0, 0x102E0,
+0x10376, 0x1037A,
+0x10A0D, 0x10A0D,
+0x10A0F, 0x10A0F,
+0x10A38, 0x10A3A,
+0x10A3F, 0x10A3F,
+0x10AE5, 0x10AE6,
+0x10D24, 0x10D27,
+0x10EAB, 0x10EAC,
+0x10EFD, 0x10EFF,
+0x10F46, 0x10F50,
+0x10F82, 0x10F85,
+0x11070, 0x11070,
+0x1107F, 0x1107F,
+0x110BA, 0x110BA,
+0x11100, 0x11102,
+0x11133, 0x11134,
+0x11173, 0x11173,
+0x111CA, 0x111CA,
+0x11236, 0x11236,
+0x112E9, 0x112EA,
+0x1133B, 0x1133C,
+0x11366, 0x1136C,
+0x11370, 0x11374,
+0x11446, 0x11446,
+0x1145E, 0x1145E,
+0x114C3, 0x114C3,
+0x115C0, 0x115C0,
+0x116B7, 0x116B7,
+0x1172B, 0x1172B,
+0x1183A, 0x1183A,
+0x1193E, 0x1193E,
+0x11943, 0x11943,
+0x11A34, 0x11A34,
+0x11A47, 0x11A47,
+0x11A99, 0x11A99,
+0x11D42, 0x11D42,
+0x11D44, 0x11D45,
+0x11D97, 0x11D97,
+0x11F42, 0x11F42,
+0x16AF0, 0x16AF4,
+0x16B30, 0x16B36,
+0x1BC9E, 0x1BC9E,
+0x1D165, 0x1D165,
+0x1D167, 0x1D169,
+0x1D16E, 0x1D172,
+0x1D17B, 0x1D182,
+0x1D185, 0x1D18B,
+0x1D1AA, 0x1D1AD,
+0x1D242, 0x1D244,
+0x1E000, 0x1E006,
+0x1E008, 0x1E018,
+0x1E01B, 0x1E021,
+0x1E023, 0x1E024,
+0x1E026, 0x1E02A,
+0x1E08F, 0x1E08F,
+0x1E130, 0x1E136,
+0x1E2AE, 0x1E2AE,
+0x1E2EC, 0x1E2EF,
+0x1E4EC, 0x1E4EF,
+0x1E8D0, 0x1E8D6,
+0x1E944, 0x1E94A,
+}
+
+//
+// End of Unicode 15.1.0 block.
+//
diff --git a/core/unicode/utf8/grapheme.odin b/core/unicode/utf8/grapheme.odin
new file mode 100644
index 000000000..c0851c6ea
--- /dev/null
+++ b/core/unicode/utf8/grapheme.odin
@@ -0,0 +1,387 @@
+package utf8
+
+import "core:unicode"
+
+ZERO_WIDTH_JOINER :: unicode.ZERO_WIDTH_JOINER
+is_control :: unicode.is_control
+is_hangul_syllable_leading :: unicode.is_hangul_syllable_leading
+is_hangul_syllable_vowel :: unicode.is_hangul_syllable_vowel
+is_hangul_syllable_trailing :: unicode.is_hangul_syllable_trailing
+is_hangul_syllable_lv :: unicode.is_hangul_syllable_lv
+is_hangul_syllable_lvt :: unicode.is_hangul_syllable_lvt
+is_indic_conjunct_break_extend :: unicode.is_indic_conjunct_break_extend
+is_indic_conjunct_break_linker :: unicode.is_indic_conjunct_break_linker
+is_indic_conjunct_break_consonant :: unicode.is_indic_conjunct_break_consonant
+is_gcb_extend_class :: unicode.is_gcb_extend_class
+is_spacing_mark :: unicode.is_spacing_mark
+is_gcb_prepend_class :: unicode.is_gcb_prepend_class
+is_emoji_extended_pictographic :: unicode.is_emoji_extended_pictographic
+is_regional_indicator :: unicode.is_regional_indicator
+
+
+Grapheme :: struct {
+ byte_index: int,
+ rune_index: int,
+}
+
+/*
+Count the individual graphemes in a UTF-8 string.
+
+Inputs:
+- str: The input string.
+
+Returns:
+- graphemes: The number of graphemes in the string.
+- runes: The number of runes in the string.
+*/
+@(require_results)
+grapheme_count :: proc(str: string) -> (graphemes, runes: int) {
+ _, graphemes, runes = decode_grapheme_clusters(str, false)
+ return
+}
+
+/*
+Decode the individual graphemes in a UTF-8 string.
+
+*Allocates Using Provided Allocator*
+
+Inputs:
+- str: The input string.
+- track_graphemes: Whether or not to allocate and return `graphemes` with extra data about each grapheme.
+- allocator: (default: context.allocator)
+
+Returns:
+- graphemes: Extra data about each grapheme.
+- grapheme_count: The number of graphemes in the string.
+- rune_count: The number of runes in the string.
+*/
+@(require_results)
+decode_grapheme_clusters :: proc(
+ str: string,
+ track_graphemes := true,
+ allocator := context.allocator,
+) -> (
+ graphemes: [dynamic]Grapheme,
+ grapheme_count: int,
+ rune_count: int,
+) {
+ // The following procedure implements text segmentation by breaking on
+ // Grapheme Cluster Boundaries[1], using the values[2] and rules[3] from
+ // the Unicode® Standard Annex #29, entitled:
+ //
+ // UNICODE TEXT SEGMENTATION
+ //
+ // Version: Unicode 15.1.0
+ // Date: 2023-08-16
+ // Revision: 43
+ //
+ // This procedure is conformant[4] to UAX29-C1-1, otherwise known as the
+ // extended, non-legacy ruleset.
+ //
+ // Please see the references below for more information.
+ //
+ //
+ // NOTE(Feoramund): This procedure has not been highly optimized.
+ // A couple opportunities were taken to bypass repeated checking when a
+ // rune is outside of certain codepoint ranges, but little else has been
+ // done. Standard switches, conditionals, and binary search are used to
+ // see if a rune fits into a certain category.
+ //
+ // I did find that only one prior rune of state was necessary to build an
+ // algorithm that successfully passes all 4,835 test cases provided with
+ // this implementation from the Unicode organization's website.
+ //
+ // My initial implementation tracked explicit breaks and counted them once
+ // the string iteration had terminated. I've found this current
+ // implementation to be far simpler and need no allocations (unless the
+ // caller wants position data).
+ //
+ // Most rules work backwards instead of forwards which has helped keep this
+ // simple, despite its length and verbosity.
+ //
+ //
+ // The implementation has been left verbose and in the order described by
+ // the specification, to enable better readability and future upkeep.
+ //
+ // Some possible optimizations might include:
+ //
+ // - saving the type of `last_rune` instead of the exact rune.
+ // - reordering rules.
+ // - combining tables.
+ //
+ //
+ // [1]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+ // [2]: https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table
+ // [3]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
+ // [4]: https://www.unicode.org/reports/tr29/#Conformance
+
+ Grapheme_Cluster_Sequence :: enum {
+ None,
+ Indic,
+ Emoji,
+ Regional,
+ }
+
+ context.allocator = allocator
+
+ last_rune: rune
+ last_rune_breaks_forward: bool
+
+ last_grapheme_count: int
+
+ bypass_next_rune: bool
+
+ regional_indicator_counter: int
+
+ current_sequence: Grapheme_Cluster_Sequence
+ continue_sequence: bool
+
+ for this_rune, byte_index in str {
+ defer {
+ // "Break at the start and end of text, unless the text is empty."
+ //
+ // GB1: sot ÷ Any
+ // GB2: Any ÷ eot
+ if rune_count == 0 && grapheme_count == 0 {
+ grapheme_count += 1
+ }
+ if track_graphemes && grapheme_count > last_grapheme_count {
+ append(&graphemes, Grapheme{ byte_index, rune_count })
+ }
+ last_grapheme_count = grapheme_count
+
+ last_rune = this_rune
+ rune_count += 1
+
+ if !continue_sequence {
+ current_sequence = .None
+ regional_indicator_counter = 0
+ }
+ continue_sequence = false
+ }
+
+ // "Do not break between a CR and LF. Otherwise, break before and after controls."
+ //
+ // GB3: CR × LF
+ // GB4: (Control | CR | LF) ÷
+ // GB5: ÷ (Control | CR | LF)
+ if this_rune == '\n' && last_rune == '\r' {
+ last_rune_breaks_forward = false
+ bypass_next_rune = false
+ continue
+ }
+
+ if is_control(this_rune) {
+ grapheme_count += 1
+ last_rune_breaks_forward = true
+ bypass_next_rune = true
+ continue
+ }
+
+ // (This check is for rules that work forwards, instead of backwards.)
+ if bypass_next_rune {
+ if last_rune_breaks_forward {
+ grapheme_count += 1
+ last_rune_breaks_forward = false
+ }
+
+ bypass_next_rune = false
+ continue
+ }
+
+ // (Optimization 1: Prevent low runes from proceeding further.)
+ //
+ // * 0xA9 and 0xAE are in the Extended_Pictographic range,
+ // which is checked later in GB11.
+ if this_rune != 0xA9 && this_rune != 0xAE && this_rune <= 0x2FF {
+ grapheme_count += 1
+ continue
+ }
+
+ // (Optimization 2: Check if the rune is in the Hangul space before getting specific.)
+ if 0x1100 <= this_rune && this_rune <= 0xD7FB {
+ // "Do not break Hangul syllable sequences."
+ //
+ // GB6: L × (L | V | LV | LVT)
+ // GB7: (LV | V) × (V | T)
+ // GB8: (LVT | T) × T
+ if is_hangul_syllable_leading(this_rune) ||
+ is_hangul_syllable_lv(this_rune) ||
+ is_hangul_syllable_lvt(this_rune)
+ {
+ if !is_hangul_syllable_leading(last_rune) {
+ grapheme_count += 1
+ }
+ continue
+ }
+
+ if is_hangul_syllable_vowel(this_rune) {
+ if is_hangul_syllable_leading(last_rune) ||
+ is_hangul_syllable_vowel(last_rune) ||
+ is_hangul_syllable_lv(last_rune)
+ {
+ continue
+ }
+ grapheme_count += 1
+ continue
+ }
+
+ if is_hangul_syllable_trailing(this_rune) {
+ if is_hangul_syllable_trailing(last_rune) ||
+ is_hangul_syllable_lvt(last_rune) ||
+ is_hangul_syllable_lv(last_rune) ||
+ is_hangul_syllable_vowel(last_rune)
+ {
+ continue
+ }
+ grapheme_count += 1
+ continue
+ }
+ }
+
+ // "Do not break before extending characters or ZWJ."
+ //
+ // GB9: × (Extend | ZWJ)
+ if this_rune == ZERO_WIDTH_JOINER {
+ continue_sequence = true
+ continue
+ }
+
+ if is_gcb_extend_class(this_rune) {
+ // (Support for GB9c.)
+ if current_sequence == .Indic {
+ if is_indic_conjunct_break_extend(this_rune) && (
+ is_indic_conjunct_break_linker(last_rune) ||
+ is_indic_conjunct_break_consonant(last_rune) )
+ {
+ continue_sequence = true
+ continue
+ }
+
+ if is_indic_conjunct_break_linker(this_rune) && (
+ is_indic_conjunct_break_linker(last_rune) ||
+ is_indic_conjunct_break_extend(last_rune) ||
+ is_indic_conjunct_break_consonant(last_rune) )
+ {
+ continue_sequence = true
+ continue
+ }
+
+ continue
+ }
+
+ // (Support for GB11.)
+ if current_sequence == .Emoji && (
+ is_gcb_extend_class(last_rune) ||
+ is_emoji_extended_pictographic(last_rune) )
+ {
+ continue_sequence = true
+ }
+
+ continue
+ }
+
+ // _The GB9a and GB9b rules only apply to extended grapheme clusters:_
+ // "Do not break before SpacingMarks, or after Prepend characters."
+ //
+ // GB9a: × SpacingMark
+ // GB9b: Prepend ×
+ if is_spacing_mark(this_rune) {
+ continue
+ }
+
+ if is_gcb_prepend_class(this_rune) {
+ grapheme_count += 1
+ bypass_next_rune = true
+ continue
+ }
+
+ // _The GB9c rule only applies to extended grapheme clusters:_
+ // "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker."
+ //
+ // GB9c: \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant}
+ if is_indic_conjunct_break_consonant(this_rune) {
+ if current_sequence == .Indic {
+ if last_rune == ZERO_WIDTH_JOINER ||
+ is_indic_conjunct_break_linker(last_rune)
+ {
+ continue_sequence = true
+ } else {
+ grapheme_count += 1
+ }
+ } else {
+ grapheme_count += 1
+ current_sequence = .Indic
+ continue_sequence = true
+ }
+ continue
+ }
+
+ if is_indic_conjunct_break_extend(this_rune) {
+ if current_sequence == .Indic {
+ if is_indic_conjunct_break_consonant(last_rune) ||
+ is_indic_conjunct_break_linker(last_rune)
+ {
+ continue_sequence = true
+ } else {
+ grapheme_count += 1
+ }
+ }
+ continue
+ }
+
+ if is_indic_conjunct_break_linker(this_rune) {
+ if current_sequence == .Indic {
+ if is_indic_conjunct_break_extend(last_rune) ||
+ is_indic_conjunct_break_linker(last_rune)
+ {
+ continue_sequence = true
+ } else {
+ grapheme_count += 1
+ }
+ }
+ continue
+ }
+
+ //
+ // (Curiously, there is no GB10.)
+ //
+
+ // "Do not break within emoji modifier sequences or emoji zwj sequences."
+ //
+ // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
+ if is_emoji_extended_pictographic(this_rune) {
+ if current_sequence != .Emoji || last_rune != ZERO_WIDTH_JOINER {
+ grapheme_count += 1
+ }
+ current_sequence = .Emoji
+ continue_sequence = true
+ continue
+ }
+
+ // "Do not break within emoji flag sequences.
+ // That is, do not break between regional indicator (RI) symbols
+ // if there is an odd number of RI characters before the break point."
+ //
+ // GB12: sot (RI RI)* RI × RI
+ // GB13: [^RI] (RI RI)* RI × RI
+ if is_regional_indicator(this_rune) {
+ if regional_indicator_counter & 1 == 0 {
+ grapheme_count += 1
+ }
+
+ current_sequence = .Regional
+ continue_sequence = true
+ regional_indicator_counter += 1
+
+ continue
+ }
+
+ // "Otherwise, break everywhere."
+ //
+ // GB999: Any ÷ Any
+ grapheme_count += 1
+ }
+
+ return
+}