diff options
| author | gingerBill <bill@gingerbill.org> | 2021-11-27 14:57:20 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2021-11-27 14:57:20 +0000 |
| commit | 7876660d8c78143c71d7ba2b42d52ea67219a628 (patch) | |
| tree | 5cf628d14f06945c40fb9e0b4bc60207810c3b29 /core/unicode | |
| parent | db9326f31d1e9c96705d713953e3cb8a0410c7e5 (diff) | |
Add new utf16 procedures: `decode`, `decode_to_utf8`
Diffstat (limited to 'core/unicode')
| -rw-r--r-- | core/unicode/utf16/utf16.odin | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin index 380381f9a..2e349640e 100644 --- a/core/unicode/utf16/utf16.odin +++ b/core/unicode/utf16/utf16.odin @@ -1,5 +1,7 @@ package utf16 +import "core:unicode/utf8" + REPLACEMENT_CHAR :: '\ufffd' MAX_RUNE :: '\U0010ffff' @@ -80,3 +82,49 @@ encode_string :: proc(d: []u16, s: string) -> int { } return n } + +decode :: proc(d: []rune, s: []u16) -> (n: int) { + for i := 0; i < len(s); i += 1 { + if n >= len(d) { + return + } + + r := rune(REPLACEMENT_CHAR) + + switch c := s[i]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && i+1 < len(s) && + _surr2 <= s[i+1] && s[i+1] < _surr3: + r = decode_surrogate_pair(rune(c), rune(s[i+1])) + i += 1 + } + d[n] = r + + n += 1 + } + return +} + + +decode_to_utf8 :: proc(d: []byte, s: []u16) -> (n: int) { + for i := 0; i < len(s); i += 1 { + if n >= len(d) { + return + } + r := rune(REPLACEMENT_CHAR) + + switch c := s[i]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= r && r < _surr2 && i+1 < len(s) && + _surr2 <= s[i+1] && s[i+1] < _surr3: + r = decode_surrogate_pair(rune(r), rune(s[i+1])) + i += 1 + } + + b, w := utf8.encode_rune(rune(r)) + n += copy(d[n:], b[:w]) + } + return +}
\ No newline at end of file |