aboutsummaryrefslogtreecommitdiff
path: root/core/unicode
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-11-27 14:57:20 +0000
committergingerBill <bill@gingerbill.org>2021-11-27 14:57:20 +0000
commit7876660d8c78143c71d7ba2b42d52ea67219a628 (patch)
tree5cf628d14f06945c40fb9e0b4bc60207810c3b29 /core/unicode
parentdb9326f31d1e9c96705d713953e3cb8a0410c7e5 (diff)
Add new utf16 procedures: `decode`, `decode_to_utf8`
Diffstat (limited to 'core/unicode')
-rw-r--r--core/unicode/utf16/utf16.odin48
1 files changed, 48 insertions, 0 deletions
diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin
index 380381f9a..2e349640e 100644
--- a/core/unicode/utf16/utf16.odin
+++ b/core/unicode/utf16/utf16.odin
@@ -1,5 +1,7 @@
package utf16
+import "core:unicode/utf8"
+
REPLACEMENT_CHAR :: '\ufffd'
MAX_RUNE :: '\U0010ffff'
@@ -80,3 +82,49 @@ encode_string :: proc(d: []u16, s: string) -> int {
}
return n
}
+
+decode :: proc(d: []rune, s: []u16) -> (n: int) {
+ for i := 0; i < len(s); i += 1 {
+ if n >= len(d) {
+ return
+ }
+
+ r := rune(REPLACEMENT_CHAR)
+
+ switch c := s[i]; {
+ case c < _surr1, _surr3 <= c:
+ r = rune(c)
+ case _surr1 <= c && c < _surr2 && i+1 < len(s) &&
+ _surr2 <= s[i+1] && s[i+1] < _surr3:
+ r = decode_surrogate_pair(rune(c), rune(s[i+1]))
+ i += 1
+ }
+ d[n] = r
+
+ n += 1
+ }
+ return
+}
+
+
+decode_to_utf8 :: proc(d: []byte, s: []u16) -> (n: int) {
+ for i := 0; i < len(s); i += 1 {
+ if n >= len(d) {
+ return
+ }
+ r := rune(REPLACEMENT_CHAR)
+
+ switch c := s[i]; {
+ case c < _surr1, _surr3 <= c:
+ r = rune(c)
+ case _surr1 <= r && r < _surr2 && i+1 < len(s) &&
+ _surr2 <= s[i+1] && s[i+1] < _surr3:
+ r = decode_surrogate_pair(rune(r), rune(s[i+1]))
+ i += 1
+ }
+
+ b, w := utf8.encode_rune(rune(r))
+ n += copy(d[n:], b[:w])
+ }
+ return
+} \ No newline at end of file