1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
package utf16
import "core:unicode/utf8"
REPLACEMENT_CHAR :: '\ufffd'
MAX_RUNE :: '\U0010ffff'
_surr1 :: 0xd800
_surr2 :: 0xdc00
_surr3 :: 0xe000
_surr_self :: 0x10000
is_surrogate :: proc(r: rune) -> bool {
return _surr1 <= r && r < _surr3
}
decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
}
return REPLACEMENT_CHAR
}
encode_surrogate_pair :: proc(c: rune) -> (r1, r2: rune) {
r := c
if r < _surr_self || r > MAX_RUNE {
return REPLACEMENT_CHAR, REPLACEMENT_CHAR
}
r -= _surr_self
return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff
}
encode :: proc(d: []u16, s: []rune) -> int {
n, m := 0, len(d)
loop: for r in s {
switch r {
case 0..<_surr1, _surr3 ..< _surr_self:
if m+1 < n { break loop }
d[n] = u16(r)
n += 1
case _surr_self ..= MAX_RUNE:
if m+2 < n { break loop }
r1, r2 := encode_surrogate_pair(r)
d[n] = u16(r1)
d[n+1] = u16(r2)
n += 2
case:
if m+1 < n { break loop }
d[n] = u16(REPLACEMENT_CHAR)
n += 1
}
}
return n
}
encode_string :: proc(d: []u16, s: string) -> int {
n, m := 0, len(d)
loop: for r in s {
switch r {
case 0..<_surr1, _surr3 ..< _surr_self:
if m+1 < n { break loop }
d[n] = u16(r)
n += 1
case _surr_self ..= MAX_RUNE:
if m+2 < n { break loop }
r1, r2 := encode_surrogate_pair(r)
d[n] = u16(r1)
d[n+1] = u16(r2)
n += 2
case:
if m+1 < n { break loop }
d[n] = u16(REPLACEMENT_CHAR)
n += 1
}
}
return n
}
decode :: proc(d: []rune, s: []u16) -> (n: int) {
for i := 0; i < len(s); i += 1 {
if n >= len(d) {
return
}
r := rune(REPLACEMENT_CHAR)
switch c := s[i]; {
case c < _surr1, _surr3 <= c:
r = rune(c)
case _surr1 <= c && c < _surr2 && i+1 < len(s) &&
_surr2 <= s[i+1] && s[i+1] < _surr3:
r = decode_surrogate_pair(rune(c), rune(s[i+1]))
i += 1
}
d[n] = r
n += 1
}
return
}
decode_to_utf8 :: proc(d: []byte, s: []u16) -> (n: int) {
for i := 0; i < len(s); i += 1 {
if n >= len(d) {
return
}
r := rune(REPLACEMENT_CHAR)
switch c := s[i]; {
case c < _surr1, _surr3 <= c:
r = rune(c)
case _surr1 <= c && c < _surr2 && i+1 < len(s) &&
_surr2 <= s[i+1] && s[i+1] < _surr3:
r = decode_surrogate_pair(rune(c), rune(s[i+1]))
i += 1
}
b, w := utf8.encode_rune(rune(r))
n += copy(d[n:], b[:w])
}
return
}
|