aboutsummaryrefslogtreecommitdiff
path: root/core/unicode
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-08-31 22:21:13 +0100
committergingerBill <bill@gingerbill.org>2021-08-31 22:21:13 +0100
commit251da264ed6e0f039931683c7b0d4b97e88c8d99 (patch)
treec7a9a088477d2452c2cf850458c62d994a211df6 /core/unicode
parentb176af27427a6c39448a71a8023e4a9877f0a51c (diff)
Remove unneeded semicolons from the core library
Diffstat (limited to 'core/unicode')
-rw-r--r--core/unicode/letter.odin152
-rw-r--r--core/unicode/tables.odin42
-rw-r--r--core/unicode/utf16/utf16.odin66
-rw-r--r--core/unicode/utf8/utf8.odin344
4 files changed, 302 insertions, 302 deletions
diff --git a/core/unicode/letter.odin b/core/unicode/letter.odin
index b498e4272..891c90bf3 100644
--- a/core/unicode/letter.odin
+++ b/core/unicode/letter.odin
@@ -1,195 +1,195 @@
package unicode
-MAX_RUNE :: '\U00010fff'; // Maximum valid unicode code point
-REPLACEMENT_CHAR :: '\ufffd'; // Represented an invalid code point
-MAX_ASCII :: '\u007f'; // Maximum ASCII value
-MAX_LATIN1 :: '\u00ff'; // Maximum Latin-1 value
+MAX_RUNE :: '\U00010fff' // Maximum valid unicode code point
+REPLACEMENT_CHAR :: '\ufffd' // Represented an invalid code point
+MAX_ASCII :: '\u007f' // Maximum ASCII value
+MAX_LATIN1 :: '\u00ff' // Maximum Latin-1 value
binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
- n := length;
- t := 0;
+ n := length
+ t := 0
for n > 1 {
- m := n / 2;
- p := t + m*stride;
+ m := n / 2
+ p := t + m*stride
if c >= table[p] {
- t = p;
- n = n-m;
+ t = p
+ n = n-m
} else {
- n = m;
+ n = m
}
}
if n != 0 && c >= table[t] {
- return t;
+ return t
}
- return -1;
+ return -1
}
to_lower :: proc(r: rune) -> rune {
- c := i32(r);
- p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3);
+ c := i32(r)
+ p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
- return rune(c + to_lower_ranges[p+2] - 500);
+ return rune(c + to_lower_ranges[p+2] - 500)
}
- p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2);
+ p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
if p >= 0 && c == to_lower_singlets[p] {
- return rune(c + to_lower_singlets[p+1] - 500);
+ return rune(c + to_lower_singlets[p+1] - 500)
}
- return rune(c);
+ return rune(c)
}
to_upper :: proc(r: rune) -> rune {
- c := i32(r);
- p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3);
+ c := i32(r)
+ p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
- return rune(c + to_upper_ranges[p+2] - 500);
+ return rune(c + to_upper_ranges[p+2] - 500)
}
- p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2);
+ p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
if p >= 0 && c == to_upper_singlets[p] {
- return rune(c + to_upper_singlets[p+1] - 500);
+ return rune(c + to_upper_singlets[p+1] - 500)
}
- return rune(c);
+ return rune(c)
}
to_title :: proc(r: rune) -> rune {
- c := i32(r);
- p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2);
+ c := i32(r)
+ p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2)
if p >= 0 && c == to_upper_singlets[p] {
- return rune(c + to_title_singlets[p+1] - 500);
+ return rune(c + to_title_singlets[p+1] - 500)
}
- return rune(c);
+ return rune(c)
}
is_lower :: proc(r: rune) -> bool {
if r <= MAX_ASCII {
- return u32(r)-'a' < 26;
+ return u32(r)-'a' < 26
}
- c := i32(r);
- p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3);
+ c := i32(r)
+ p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
- return true;
+ return true
}
- p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2);
+ p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
if p >= 0 && c == to_upper_singlets[p] {
- return true;
+ return true
}
- return false;
+ return false
}
is_upper :: proc(r: rune) -> bool {
if r <= MAX_ASCII {
- return u32(r)-'A' < 26;
+ return u32(r)-'A' < 26
}
- c := i32(r);
- p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3);
+ c := i32(r)
+ p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
- return true;
+ return true
}
- p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2);
+ p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
if p >= 0 && c == to_lower_singlets[p] {
- return true;
+ return true
}
- return false;
+ return false
}
-is_alpha :: is_letter;
+is_alpha :: is_letter
is_letter :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pLmask != 0;
+ return char_properties[u8(r)]&pLmask != 0
}
if is_upper(r) || is_lower(r) {
- return true;
+ return true
}
- c := i32(r);
- p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2);
+ c := i32(r)
+ p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2)
if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1] {
- return true;
+ return true
}
- p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1);
+ p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1)
if p >= 0 && c == alpha_singlets[p] {
- return true;
+ return true
}
- return false;
+ return false
}
is_title :: proc(r: rune) -> bool {
- return is_upper(r) && is_lower(r);
+ return is_upper(r) && is_lower(r)
}
is_digit :: proc(r: rune) -> bool {
if r <= MAX_LATIN1 {
- return '0' <= r && r <= '9';
+ return '0' <= r && r <= '9'
}
- return false;
+ return false
}
-is_white_space :: is_space;
+is_white_space :: is_space
is_space :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
switch r {
case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0:
- return true;
+ return true
}
- return false;
+ return false
}
- c := i32(r);
- p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2);
+ c := i32(r)
+ p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2)
if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {
- return true;
+ return true
}
- return false;
+ return false
}
is_combining :: proc(r: rune) -> bool {
- c := i32(r);
+ c := i32(r)
return c >= 0x0300 && (c <= 0x036f ||
(c >= 0x1ab0 && c <= 0x1aff) ||
(c >= 0x1dc0 && c <= 0x1dff) ||
(c >= 0x20d0 && c <= 0x20ff) ||
- (c >= 0xfe20 && c <= 0xfe2f));
+ (c >= 0xfe20 && c <= 0xfe2f))
}
is_graphic :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pg != 0;
+ return char_properties[u8(r)]&pg != 0
}
- return false;
+ return false
}
is_print :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pp != 0;
+ return char_properties[u8(r)]&pp != 0
}
- return false;
+ return false
}
is_control :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pC != 0;
+ return char_properties[u8(r)]&pC != 0
}
- return false;
+ return false
}
is_number :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pN != 0;
+ return char_properties[u8(r)]&pN != 0
}
- return false;
+ return false
}
is_punct :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pP != 0;
+ return char_properties[u8(r)]&pP != 0
}
- return false;
+ return false
}
is_symbol :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
- return char_properties[u8(r)]&pS != 0;
+ return char_properties[u8(r)]&pS != 0
}
- return false;
+ return false
}
diff --git a/core/unicode/tables.odin b/core/unicode/tables.odin
index ff4793402..f43827413 100644
--- a/core/unicode/tables.odin
+++ b/core/unicode/tables.odin
@@ -1,16 +1,16 @@
package unicode
-@(private) pC :: 1<<0; // a control character.
-@(private) pP :: 1<<1; // a punctuation character.
-@(private) pN :: 1<<2; // a numeral.
-@(private) pS :: 1<<3; // a symbolic character.
-@(private) pZ :: 1<<4; // a spacing character.
-@(private) pLu :: 1<<5; // an upper-case letter.
-@(private) pLl :: 1<<6; // a lower-case letter.
-@(private) pp :: 1<<7; // a printable character according to Go's definition.
-@(private) pg :: pp | pZ; // a graphical character according to the Unicode definition.
-@(private) pLo :: pLl | pLu; // a letter that is neither upper nor lower case.
-@(private) pLmask :: pLo;
+@(private) pC :: 1<<0 // a control character.
+@(private) pP :: 1<<1 // a punctuation character.
+@(private) pN :: 1<<2 // a numeral.
+@(private) pS :: 1<<3 // a symbolic character.
+@(private) pZ :: 1<<4 // a spacing character.
+@(private) pLu :: 1<<5 // an upper-case letter.
+@(private) pLl :: 1<<6 // a lower-case letter.
+@(private) pp :: 1<<7 // a printable character according to Go's definition.
+@(private) pg :: pp | pZ // a graphical character according to the Unicode definition.
+@(private) pLo :: pLl | pLu // a letter that is neither upper nor lower case.
+@(private) pLmask :: pLo
char_properties := [MAX_LATIN1+1]u8{
0x00 = pC, // '\x00'
@@ -269,7 +269,7 @@ char_properties := [MAX_LATIN1+1]u8{
0xFD = pLl | pp, // 'ý'
0xFE = pLl | pp, // 'þ'
0xFF = pLl | pp, // 'ÿ'
-};
+}
alpha_ranges := [?]i32{
@@ -425,7 +425,7 @@ alpha_ranges := [?]i32{
0xffca, 0xffcf,
0xffd2, 0xffd7,
0xffda, 0xffdc,
-};
+}
alpha_singlets := [?]i32{
0x00aa,
@@ -460,7 +460,7 @@ alpha_singlets := [?]i32{
0x2128,
0xfb3e,
0xfe74,
-};
+}
space_ranges := [?]i32{
0x0009, 0x000d, // tab and newline
@@ -475,7 +475,7 @@ space_ranges := [?]i32{
0x205f, 0x205f, // medium mathematical space
0x3000, 0x3000, // ideographic space
0xfeff, 0xfeff,
-};
+}
unicode_spaces := [?]i32{
0x0009, // tab
@@ -492,7 +492,7 @@ unicode_spaces := [?]i32{
0x205f, // medium mathematical space
0x3000, // ideographic space
0xfeff, // unknown
-};
+}
to_upper_ranges := [?]i32{
0x0061, 0x007a, 468, // a-z A-Z
@@ -530,7 +530,7 @@ to_upper_ranges := [?]i32{
0x2170, 0x217f, 484,
0x24d0, 0x24e9, 474,
0xff41, 0xff5a, 468,
-};
+}
to_upper_singlets := [?]i32{
0x00ff, 621,
@@ -873,7 +873,7 @@ to_upper_singlets := [?]i32{
0x1fc3, 509,
0x1fe5, 507,
0x1ff3, 509,
-};
+}
to_lower_ranges := [?]i32{
0x0041, 0x005a, 532, // A-Z a-z
@@ -912,7 +912,7 @@ to_lower_ranges := [?]i32{
0x2160, 0x216f, 516, // - -
0x24b6, 0x24cf, 526, // - -
0xff21, 0xff3a, 532, // - -
-};
+}
to_lower_singlets := [?]i32{
0x0100, 501,
@@ -1248,7 +1248,7 @@ to_lower_singlets := [?]i32{
0x1fcc, 491,
0x1fec, 493,
0x1ffc, 491,
-};
+}
to_title_singlets := [?]i32{
0x01c4, 501,
@@ -1259,4 +1259,4 @@ to_title_singlets := [?]i32{
0x01cc, 499,
0x01f1, 501,
0x01f3, 499,
-};
+}
diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin
index 4c76956cc..27edf088d 100644
--- a/core/unicode/utf16/utf16.odin
+++ b/core/unicode/utf16/utf16.odin
@@ -1,82 +1,82 @@
package utf16
-REPLACEMENT_CHAR :: '\ufffd';
-MAX_RUNE :: '\U0010ffff';
+REPLACEMENT_CHAR :: '\ufffd'
+MAX_RUNE :: '\U0010ffff'
-_surr1 :: 0xd800;
-_surr2 :: 0xdc00;
-_surr3 :: 0xe000;
-_surr_self :: 0x10000;
+_surr1 :: 0xd800
+_surr2 :: 0xdc00
+_surr3 :: 0xe000
+_surr_self :: 0x10000
is_surrogate :: proc(r: rune) -> bool {
- return _surr1 <= r && r < _surr3;
+ return _surr1 <= r && r < _surr3
}
decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
- return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
+ return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
}
- return REPLACEMENT_CHAR;
+ return REPLACEMENT_CHAR
}
encode_surrogate_pair :: proc(c: rune) -> (r1, r2: rune) {
- r := c;
+ r := c
if r < _surr_self || r > MAX_RUNE {
- return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
+ return REPLACEMENT_CHAR, REPLACEMENT_CHAR
}
- r -= _surr_self;
- return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
+ r -= _surr_self
+ return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff
}
encode :: proc(d: []u16, s: []rune) -> int {
- n, m := 0, len(d);
+ n, m := 0, len(d)
loop: for r in s {
switch r {
case 0..<_surr1, _surr3 ..< _surr_self:
if m+1 < n { break loop; }
- d[n] = u16(r);
- n += 1;
+ d[n] = u16(r)
+ n += 1
case _surr_self ..= MAX_RUNE:
if m+2 < n { break loop; }
- r1, r2 := encode_surrogate_pair(r);
- d[n] = u16(r1);
- d[n+1] = u16(r2);
- n += 2;
+ r1, r2 := encode_surrogate_pair(r)
+ d[n] = u16(r1)
+ d[n+1] = u16(r2)
+ n += 2
case:
if m+1 < n { break loop; }
- d[n] = u16(REPLACEMENT_CHAR);
- n += 1;
+ d[n] = u16(REPLACEMENT_CHAR)
+ n += 1
}
}
- return n;
+ return n
}
encode_string :: proc(d: []u16, s: string) -> int {
- n, m := 0, len(d);
+ n, m := 0, len(d)
loop: for r in s {
switch r {
case 0..<_surr1, _surr3 ..< _surr_self:
if m+1 < n { break loop; }
- d[n] = u16(r);
- n += 1;
+ d[n] = u16(r)
+ n += 1
case _surr_self ..= MAX_RUNE:
if m+2 < n { break loop; }
- r1, r2 := encode_surrogate_pair(r);
- d[n] = u16(r1);
- d[n+1] = u16(r2);
- n += 2;
+ r1, r2 := encode_surrogate_pair(r)
+ d[n] = u16(r1)
+ d[n+1] = u16(r2)
+ n += 2
case:
if m+1 < n { break loop; }
- d[n] = u16(REPLACEMENT_CHAR);
- n += 1;
+ d[n] = u16(REPLACEMENT_CHAR)
+ n += 1
}
}
- return n;
+ return n
}
diff --git a/core/unicode/utf8/utf8.odin b/core/unicode/utf8/utf8.odin
index 61f54e07f..ba9bb6de0 100644
--- a/core/unicode/utf8/utf8.odin
+++ b/core/unicode/utf8/utf8.odin
@@ -1,36 +1,36 @@
package utf8
-RUNE_ERROR :: '\ufffd';
-RUNE_SELF :: 0x80;
-RUNE_BOM :: 0xfeff;
-RUNE_EOF :: ~rune(0);
-MAX_RUNE :: '\U0010ffff';
-UTF_MAX :: 4;
-
-SURROGATE_MIN :: 0xd800;
-SURROGATE_MAX :: 0xdfff;
-
-T1 :: 0b0000_0000;
-TX :: 0b1000_0000;
-T2 :: 0b1100_0000;
-T3 :: 0b1110_0000;
-T4 :: 0b1111_0000;
-T5 :: 0b1111_1000;
-
-MASKX :: 0b0011_1111;
-MASK2 :: 0b0001_1111;
-MASK3 :: 0b0000_1111;
-MASK4 :: 0b0000_0111;
-
-RUNE1_MAX :: 1<<7 - 1;
-RUNE2_MAX :: 1<<11 - 1;
-RUNE3_MAX :: 1<<16 - 1;
+RUNE_ERROR :: '\ufffd'
+RUNE_SELF :: 0x80
+RUNE_BOM :: 0xfeff
+RUNE_EOF :: ~rune(0)
+MAX_RUNE :: '\U0010ffff'
+UTF_MAX :: 4
+
+SURROGATE_MIN :: 0xd800
+SURROGATE_MAX :: 0xdfff
+
+T1 :: 0b0000_0000
+TX :: 0b1000_0000
+T2 :: 0b1100_0000
+T3 :: 0b1110_0000
+T4 :: 0b1111_0000
+T5 :: 0b1111_1000
+
+MASKX :: 0b0011_1111
+MASK2 :: 0b0001_1111
+MASK3 :: 0b0000_1111
+MASK4 :: 0b0000_0111
+
+RUNE1_MAX :: 1<<7 - 1
+RUNE2_MAX :: 1<<11 - 1
+RUNE3_MAX :: 1<<16 - 1
// The default lowest and highest continuation byte.
-LOCB :: 0b1000_0000;
-HICB :: 0b1011_1111;
+LOCB :: 0b1000_0000
+HICB :: 0b1011_1111
-Accept_Range :: struct {lo, hi: u8};
+Accept_Range :: struct {lo, hi: u8}
accept_ranges := [5]Accept_Range{
{0x80, 0xbf},
@@ -38,7 +38,7 @@ accept_ranges := [5]Accept_Range{
{0x80, 0x9f},
{0x90, 0xbf},
{0x80, 0x8f},
-};
+}
accept_sizes := [256]u8{
0x00..0x7f = 0xf0,
@@ -52,329 +52,329 @@ accept_sizes := [256]u8{
0xf1..0xf3 = 0x04,
0xf4 = 0x44,
0xf5..0xff = 0xf1,
-};
+}
encode_rune :: proc(c: rune) -> ([4]u8, int) {
- r := c;
+ r := c
- buf: [4]u8;
- i := u32(r);
- mask :: u8(0x3f);
+ buf: [4]u8
+ i := u32(r)
+ mask :: u8(0x3f)
if i <= 1<<7-1 {
- buf[0] = u8(r);
- return buf, 1;
+ buf[0] = u8(r)
+ return buf, 1
}
if i <= 1<<11-1 {
- buf[0] = 0xc0 | u8(r>>6);
- buf[1] = 0x80 | u8(r) & mask;
- return buf, 2;
+ buf[0] = 0xc0 | u8(r>>6)
+ buf[1] = 0x80 | u8(r) & mask
+ return buf, 2
}
// Invalid or Surrogate range
if i > 0x0010ffff ||
(0xd800 <= i && i <= 0xdfff) {
- r = 0xfffd;
+ r = 0xfffd
}
if i <= 1<<16-1 {
- buf[0] = 0xe0 | u8(r>>12);
- buf[1] = 0x80 | u8(r>>6) & mask;
- buf[2] = 0x80 | u8(r) & mask;
- return buf, 3;
+ buf[0] = 0xe0 | u8(r>>12)
+ buf[1] = 0x80 | u8(r>>6) & mask
+ buf[2] = 0x80 | u8(r) & mask
+ return buf, 3
}
- buf[0] = 0xf0 | u8(r>>18);
- buf[1] = 0x80 | u8(r>>12) & mask;
- buf[2] = 0x80 | u8(r>>6) & mask;
- buf[3] = 0x80 | u8(r) & mask;
- return buf, 4;
+ buf[0] = 0xf0 | u8(r>>18)
+ buf[1] = 0x80 | u8(r>>12) & mask
+ buf[2] = 0x80 | u8(r>>6) & mask
+ buf[3] = 0x80 | u8(r) & mask
+ return buf, 4
}
decode_rune_in_string :: #force_inline proc(s: string) -> (rune, int) {
- return decode_rune(transmute([]u8)s);
+ return decode_rune(transmute([]u8)s)
}
decode_rune :: proc(s: []u8) -> (rune, int) {
- n := len(s);
+ n := len(s)
if n < 1 {
- return RUNE_ERROR, 0;
+ return RUNE_ERROR, 0
}
- s0 := s[0];
- x := accept_sizes[s0];
+ s0 := s[0]
+ x := accept_sizes[s0]
if x >= 0xF0 {
- mask := rune(x) << 31 >> 31; // NOTE(bill): Create 0x0000 or 0xffff.
- return rune(s[0])&~mask | RUNE_ERROR&mask, 1;
+ mask := rune(x) << 31 >> 31 // NOTE(bill): Create 0x0000 or 0xffff.
+ return rune(s[0])&~mask | RUNE_ERROR&mask, 1
}
- sz := x & 7;
- accept := accept_ranges[x>>4];
+ sz := x & 7
+ accept := accept_ranges[x>>4]
if n < int(sz) {
- return RUNE_ERROR, 1;
+ return RUNE_ERROR, 1
}
- b1 := s[1];
+ b1 := s[1]
if b1 < accept.lo || accept.hi < b1 {
- return RUNE_ERROR, 1;
+ return RUNE_ERROR, 1
}
if sz == 2 {
- return rune(s0&MASK2)<<6 | rune(b1&MASKX), 2;
+ return rune(s0&MASK2)<<6 | rune(b1&MASKX), 2
}
- b2 := s[2];
+ b2 := s[2]
if b2 < LOCB || HICB < b2 {
- return RUNE_ERROR, 1;
+ return RUNE_ERROR, 1
}
if sz == 3 {
- return rune(s0&MASK3)<<12 | rune(b1&MASKX)<<6 | rune(b2&MASKX), 3;
+ return rune(s0&MASK3)<<12 | rune(b1&MASKX)<<6 | rune(b2&MASKX), 3
}
- b3 := s[3];
+ b3 := s[3]
if b3 < LOCB || HICB < b3 {
- return RUNE_ERROR, 1;
+ return RUNE_ERROR, 1
}
- return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4;
+ return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4
}
string_to_runes :: proc(s: string, allocator := context.allocator) -> (runes: []rune) {
- n := rune_count_in_string(s);
+ n := rune_count_in_string(s)
- runes = make([]rune, n, allocator);
- i := 0;
+ runes = make([]rune, n, allocator)
+ i := 0
for r in s {
- runes[i] = r;
- i += 1;
+ runes[i] = r
+ i += 1
}
- return;
+ return
}
runes_to_string :: proc(runes: []rune, allocator := context.allocator) -> string {
- byte_count := 0;
+ byte_count := 0
for r in runes {
- _, w := encode_rune(r);
- byte_count += w;
+ _, w := encode_rune(r)
+ byte_count += w
}
- bytes := make([]byte, byte_count, allocator);
- offset := 0;
+ bytes := make([]byte, byte_count, allocator)
+ offset := 0
for r in runes {
- b, w := encode_rune(r);
- copy(bytes[offset:], b[:w]);
- offset += w;
+ b, w := encode_rune(r)
+ copy(bytes[offset:], b[:w])
+ offset += w
}
- return string(bytes);
+ return string(bytes)
}
decode_last_rune_in_string :: #force_inline proc(s: string) -> (rune, int) {
- return decode_last_rune(transmute([]u8)s);
+ return decode_last_rune(transmute([]u8)s)
}
decode_last_rune :: proc(s: []u8) -> (rune, int) {
- r: rune;
- size: int;
- start, end, limit: int;
+ r: rune
+ size: int
+ start, end, limit: int
- end = len(s);
+ end = len(s)
if end == 0 {
- return RUNE_ERROR, 0;
+ return RUNE_ERROR, 0
}
- start = end-1;
- r = rune(s[start]);
+ start = end-1
+ r = rune(s[start])
if r < RUNE_SELF {
- return r, 1;
+ return r, 1
}
- limit = max(end - UTF_MAX, 0);
+ limit = max(end - UTF_MAX, 0)
for start-=1; start >= limit; start-=1 {
if rune_start(s[start]) {
- break;
+ break
}
}
- start = max(start, 0);
- r, size = decode_rune(s[start:end]);
+ start = max(start, 0)
+ r, size = decode_rune(s[start:end])
if start+size != end {
- return RUNE_ERROR, 1;
+ return RUNE_ERROR, 1
}
- return r, size;
+ return r, size
}
rune_at_pos :: proc(s: string, pos: int) -> rune {
if pos < 0 {
- return RUNE_ERROR;
+ return RUNE_ERROR
}
- i := 0;
+ i := 0
for r in s {
if i == pos {
- return r;
+ return r
}
- i += 1;
+ i += 1
}
- return RUNE_ERROR;
+ return RUNE_ERROR
}
rune_string_at_pos :: proc(s: string, pos: int) -> string {
if pos < 0 {
- return "";
+ return ""
}
- i := 0;
+ i := 0
for c, offset in s {
if i == pos {
- w := rune_size(c);
- return s[offset:][:w];
+ w := rune_size(c)
+ return s[offset:][:w]
}
- i += 1;
+ i += 1
}
- return "";
+ return ""
}
rune_at :: proc(s: string, byte_index: int) -> rune {
- r, _ := decode_rune_in_string(s[byte_index:]);
- return r;
+ r, _ := decode_rune_in_string(s[byte_index:])
+ return r
}
// Returns the byte position of rune at position pos in s with an optional start byte position.
// Returns -1 if it runs out of the string.
rune_offset :: proc(s: string, pos: int, start: int = 0) -> int {
if pos < 0 {
- return -1;
+ return -1
}
- i := 0;
+ i := 0
for _, offset in s[start:] {
if i == pos {
- return offset+start;
+ return offset+start
}
- i += 1;
+ i += 1
}
- return -1;
+ return -1
}
valid_rune :: proc(r: rune) -> bool {
if r < 0 {
- return false;
+ return false
} else if SURROGATE_MIN <= r && r <= SURROGATE_MAX {
- return false;
+ return false
} else if r > MAX_RUNE {
- return false;
+ return false
}
- return true;
+ return true
}
valid_string :: proc(s: string) -> bool {
- n := len(s);
+ n := len(s)
for i := 0; i < n; {
- si := s[i];
+ si := s[i]
if si < RUNE_SELF { // ascii
- i += 1;
- continue;
+ i += 1
+ continue
}
- x := accept_sizes[si];
+ x := accept_sizes[si]
if x == 0xf1 {
- return false;
+ return false
}
- size := int(x & 7);
+ size := int(x & 7)
if i+size > n {
- return false;
+ return false
}
- ar := accept_ranges[x>>4];
+ ar := accept_ranges[x>>4]
if b := s[i+1]; b < ar.lo || ar.hi < b {
- return false;
+ return false
} else if size == 2 {
// Okay
} else if c := s[i+2]; c < 0x80 || 0xbf < c {
- return false;
+ return false
} else if size == 3 {
// Okay
} else if d := s[i+3]; b < 0x80 || 0xbf < d {
- return false;
+ return false
}
- i += size;
+ i += size
}
- return true;
+ return true
}
rune_start :: #force_inline proc(b: u8) -> bool {
- return b&0xc0 != 0x80;
+ return b&0xc0 != 0x80
}
rune_count_in_string :: #force_inline proc(s: string) -> int {
- return rune_count(transmute([]u8)s);
+ return rune_count(transmute([]u8)s)
}
rune_count :: proc(s: []u8) -> int {
- count := 0;
- n := len(s);
+ count := 0
+ n := len(s)
for i := 0; i < n; {
- defer count += 1;
- si := s[i];
+ defer count += 1
+ si := s[i]
if si < RUNE_SELF { // ascii
- i += 1;
- continue;
+ i += 1
+ continue
}
- x := accept_sizes[si];
+ x := accept_sizes[si]
if x == 0xf1 {
- i += 1;
- continue;
+ i += 1
+ continue
}
- size := int(x & 7);
+ size := int(x & 7)
if i+size > n {
- i += 1;
- continue;
+ i += 1
+ continue
}
- ar := accept_ranges[x>>4];
+ ar := accept_ranges[x>>4]
if b := s[i+1]; b < ar.lo || ar.hi < b {
- size = 1;
+ size = 1
} else if size == 2 {
// Okay
} else if c := s[i+2]; c < 0x80 || 0xbf < c {
- size = 1;
+ size = 1
} else if size == 3 {
// Okay
} else if d := s[i+3]; d < 0x80 || 0xbf < d {
- size = 1;
+ size = 1
}
- i += size;
+ i += size
}
- return count;
+ return count
}
rune_size :: proc(r: rune) -> int {
switch {
- case r < 0: return -1;
- case r <= 1<<7 - 1: return 1;
- case r <= 1<<11 - 1: return 2;
- case SURROGATE_MIN <= r && r <= SURROGATE_MAX: return -1;
- case r <= 1<<16 - 1: return 3;
- case r <= MAX_RUNE: return 4;
- }
- return -1;
+ case r < 0: return -1
+ case r <= 1<<7 - 1: return 1
+ case r <= 1<<11 - 1: return 2
+ case SURROGATE_MIN <= r && r <= SURROGATE_MAX: return -1
+ case r <= 1<<16 - 1: return 3
+ case r <= MAX_RUNE: return 4
+ }
+ return -1
}
// full_rune reports if the bytes in b begin with a full utf-8 encoding of a rune or not
// An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)
full_rune :: proc(b: []byte) -> bool {
- n := len(b);
+ n := len(b)
if n == 0 {
- return false;
+ return false
}
- x := _first[b[0]];
+ x := _first[b[0]]
if n >= int(x & 7) {
- return true;
+ return true
}
- accept := accept_ranges[x>>4];
+ accept := accept_ranges[x>>4]
if n > 1 && (b[1] < accept.lo || accept.hi < b[1]) {
- return true;
+ return true
} else if n > 2 && (b[2] < LOCB || HICB < b[2]) {
- return true;
+ return true
}
- return false;
+ return false
}
// full_rune_in_string reports if the bytes in s begin with a full utf-8 encoding of a rune or not
// An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)
full_rune_in_string :: proc(s: string) -> bool {
- return full_rune(transmute([]byte)s);
+ return full_rune(transmute([]byte)s)
}
@@ -390,4 +390,4 @@ _first := [256]u8{
0xf1..0xf3 = 0x04, // accept 0, size 4
0xf4 = 0x44, // accept 4, size 4
0xf5..0xff = 0xf1, // ascii, size 1
-};
+}