diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2023-05-22 18:28:59 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-22 18:28:59 +0200 |
| commit | c7d571f0b5fbd8748f357ee920fde840d27da792 (patch) | |
| tree | b3290720b50f8feda22d2587598241386431dc9c /core/encoding/json/parser.odin | |
| parent | 248f14a1efe95294fe20b4f930be4a948396ff44 (diff) | |
| parent | 5d54b710e7b90734ed352648c5c099b4e5d0701e (diff) | |
Merge pull request #2553 from laytan/fix-2550-json-unicode-issue
fix #2550 json encoding should use surrogate pairs per RFC7159
Diffstat (limited to 'core/encoding/json/parser.odin')
| -rw-r--r-- | core/encoding/json/parser.odin | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index ed36ae33b..d007e16d7 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -2,6 +2,7 @@ package json import "core:mem" import "core:unicode/utf8" +import "core:unicode/utf16" import "core:strconv" Parser :: struct { @@ -403,11 +404,19 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a } i += 6 + // If this is a surrogate pair, decode as such by taking the next rune too. + if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" { + r2 := get_u4_rune(s[i:]) + if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX { + i += 6 + r = utf16.decode_surrogate_pair(r, r2) + } + } + buf, buf_width := utf8.encode_rune(r) copy(b[w:], buf[:buf_width]) w += buf_width - case '0': if spec != .JSON { b[w] = '\x00' |