aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2023-05-22 20:44:07 +0100
committergingerBill <bill@gingerbill.org>2023-05-22 20:44:07 +0100
commite3360a0e5d0d2b91cd153d5cbc332f27cbb67661 (patch)
treeb8adfdc3a5ba6f642dcad93b49d72620373e7319
parent418144473405e3221ec32e8ce7073eed15a0a2cc (diff)
parentc7d571f0b5fbd8748f357ee920fde840d27da792 (diff)
Merge branch 'master' of https://github.com/odin-lang/Odin
-rw-r--r--core/encoding/json/marshal.odin6
-rw-r--r--core/encoding/json/parser.odin11
-rw-r--r--core/io/util.odin29
-rw-r--r--core/unicode/utf8/utf8.odin5
-rw-r--r--tests/core/encoding/json/test_core_json.odin16
5 files changed, 54 insertions, 13 deletions
diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin
index 4cf9264c5..d25015ac7 100644
--- a/core/encoding/json/marshal.odin
+++ b/core/encoding/json/marshal.odin
@@ -153,7 +153,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
case complex128: r, i = f64(real(z)), f64(imag(z))
case: return .Unsupported_Type
}
-
+
io.write_byte(w, '[') or_return
io.write_f64(w, r) or_return
io.write_string(w, ", ") or_return
@@ -165,8 +165,8 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
case runtime.Type_Info_String:
switch s in a {
- case string: io.write_quoted_string(w, s) or_return
- case cstring: io.write_quoted_string(w, string(s)) or_return
+ case string: io.write_quoted_string(w, s, '"', nil, true) or_return
+ case cstring: io.write_quoted_string(w, string(s), '"', nil, true) or_return
}
case runtime.Type_Info_Boolean:
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin
index ed36ae33b..d007e16d7 100644
--- a/core/encoding/json/parser.odin
+++ b/core/encoding/json/parser.odin
@@ -2,6 +2,7 @@ package json
import "core:mem"
import "core:unicode/utf8"
+import "core:unicode/utf16"
import "core:strconv"
Parser :: struct {
@@ -403,11 +404,19 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
}
i += 6
+ // If this is a surrogate pair, decode as such by taking the next rune too.
+ if r >= utf8.SURROGATE_MIN && r <= utf8.SURROGATE_HIGH_MAX && len(s) > i + 2 && s[i:i+2] == "\\u" {
+ r2 := get_u4_rune(s[i:])
+ if r2 >= utf8.SURROGATE_LOW_MIN && r2 <= utf8.SURROGATE_MAX {
+ i += 6
+ r = utf16.decode_surrogate_pair(r, r2)
+ }
+ }
+
buf, buf_width := utf8.encode_rune(r)
copy(b[w:], buf[:buf_width])
w += buf_width
-
case '0':
if spec != .JSON {
b[w] = '\x00'
diff --git a/core/io/util.odin b/core/io/util.odin
index 46aa97919..cfd7d3608 100644
--- a/core/io/util.odin
+++ b/core/io/util.odin
@@ -2,6 +2,7 @@ package io
import "core:strconv"
import "core:unicode/utf8"
+import "core:unicode/utf16"
read_ptr :: proc(r: Reader, p: rawptr, byte_size: int, n_read: ^int = nil) -> (n: int, err: Error) {
return read(r, ([^]byte)(p)[:byte_size], n_read)
@@ -146,7 +147,7 @@ write_encoded_rune :: proc(w: Writer, r: rune, write_quote := true, n_written: ^
return
}
-write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil) -> (n: int, err: Error) {
+write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false, n_written: ^int = nil, for_json := false) -> (n: int, err: Error) {
is_printable :: proc(r: rune) -> bool {
if r <= 0xff {
switch r {
@@ -163,7 +164,7 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false,
defer if n_written != nil {
n_written^ += n
}
-
+
if html_safe {
switch r {
case '<', '>', '&':
@@ -211,17 +212,29 @@ write_escaped_rune :: proc(w: Writer, r: rune, quote: byte, html_safe := false,
write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
}
case:
- write_byte(w, '\\', &n) or_return
- write_byte(w, 'U', &n) or_return
- for s := 28; s >= 0; s -= 4 {
- write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
+ if for_json {
+ buf: [2]u16
+ utf16.encode(buf[:], []rune{c})
+ for bc in buf {
+ write_byte(w, '\\', &n) or_return
+ write_byte(w, 'u', &n) or_return
+ for s := 12; s >= 0; s -= 4 {
+ write_byte(w, DIGITS_LOWER[bc>>uint(s) & 0xf], &n) or_return
+ }
+ }
+ } else {
+ write_byte(w, '\\', &n) or_return
+ write_byte(w, 'U', &n) or_return
+ for s := 24; s >= 0; s -= 4 {
+ write_byte(w, DIGITS_LOWER[c>>uint(s) & 0xf], &n) or_return
+ }
}
}
}
return
}
-write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil) -> (n: int, err: Error) {
+write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) {
defer if n_written != nil {
n_written^ += n
}
@@ -240,7 +253,7 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written
continue
}
- n_wrapper(write_escaped_rune(w, r, quote), &n) or_return
+ n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return
}
write_byte(w, quote, &n) or_return
diff --git a/core/unicode/utf8/utf8.odin b/core/unicode/utf8/utf8.odin
index a0da5c5d1..15c6c3650 100644
--- a/core/unicode/utf8/utf8.odin
+++ b/core/unicode/utf8/utf8.odin
@@ -10,6 +10,11 @@ UTF_MAX :: 4
SURROGATE_MIN :: 0xd800
SURROGATE_MAX :: 0xdfff
+// A high/leading surrogate is in range SURROGATE_MIN..SURROGATE_HIGH_MAX,
+// A low/trailing surrogate is in range SURROGATE_LOW_MIN..SURROGATE_MAX.
+SURROGATE_HIGH_MAX :: 0xdbff
+SURROGATE_LOW_MIN :: 0xdc00
+
T1 :: 0b0000_0000
TX :: 0b1000_0000
T2 :: 0b1100_0000
diff --git a/tests/core/encoding/json/test_core_json.odin b/tests/core/encoding/json/test_core_json.odin
index 0e6a6412f..937d1c738 100644
--- a/tests/core/encoding/json/test_core_json.odin
+++ b/tests/core/encoding/json/test_core_json.odin
@@ -32,6 +32,7 @@ main :: proc() {
parse_json(&t)
marshal_json(&t)
unmarshal_json(&t)
+ surrogate(&t)
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
if TEST_fail > 0 {
@@ -344,4 +345,17 @@ unmarshal_json :: proc(t: ^testing.T) {
for p, i in g.products {
expect(t, p == original_data.products[i], "Producted unmarshaled improperly")
}
-} \ No newline at end of file
+}
+
+@test
+surrogate :: proc(t: ^testing.T) {
+ input := `+ + * 😃 - /`
+
+ out, err := json.marshal(input)
+ expect(t, err == nil, fmt.tprintf("Expected `json.marshal(%q)` to return a nil error, got %v", input, err))
+
+ back: string
+ uerr := json.unmarshal(out, &back)
+ expect(t, uerr == nil, fmt.tprintf("Expected `json.unmarshal(%q)` to return a nil error, got %v", string(out), uerr))
+ expect(t, back == input, fmt.tprintf("Expected `json.unmarshal(%q)` to return %q, got %v", string(out), input, uerr))
+}