From 3148acf6a69868ddec0780daa293866fe079b7d4 Mon Sep 17 00:00:00 2001 From: dozn Date: Mon, 17 Mar 2025 01:51:01 -0700 Subject: [core:encoding/json] When Unmarshalling, Only Match Struct Tags If Present --- core/encoding/json/unmarshal.odin | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'core/encoding/json') diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 57371e360..a5f3bd8c6 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -390,6 +390,9 @@ unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_loca return prev } +// Struct tags can include not only the name of the JSON key, but also a tag such as `omitempty`. +// Example: `json:"key_name,omitempty"` +// This returns the first field as `json_name`, and the rest are returned as `extra`. @(private) json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) { json_name = value @@ -425,12 +428,6 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm defer delete(key, p.allocator) unmarshal_expect_token(p, .Colon) - - field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool { - prev_set := field_used[offset/8] & byte(offset&7) != 0 - field_used[offset/8] |= byte(offset&7) - return prev_set - } field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8 field_used := intrinsics.alloca(field_used_bytes + 1, 1) // + 1 to not overflow on size_of 0 types. @@ -449,7 +446,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm if use_field_idx < 0 { for field, field_idx in fields { - if key == field.name { + tag_value := reflect.struct_tag_get(field.tag, "json") + json_name, _ := json_name_from_tag_value(tag_value) + if json_name == "" && key == field.name { use_field_idx = field_idx break } @@ -470,7 +469,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm } } - if field.name == key || (field.tag != "" && reflect.struct_tag_get(field.tag, "json") == key) { + tag_value := reflect.struct_tag_get(field.tag, "json") + json_name, _ := json_name_from_tag_value(tag_value) + if (json_name == "" && field.name == key) || json_name == key { offset = field.offset type = field.type found = true @@ -492,6 +493,11 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm } if field_found { + field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool { + prev_set := field_used[offset/8] & byte(offset&7) != 0 + field_used[offset/8] |= byte(offset&7) + return prev_set + } if field_test(field_used, offset) { return .Multiple_Use_Field } -- cgit v1.2.3 From e4bc9677af62c74bb23f4c00d82d2a685ce64e50 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 22 Mar 2025 00:20:00 +0100 Subject: fix unmarshalling bit sets in json Fixes #4761 --- core/encoding/json/unmarshal.odin | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'core/encoding/json') diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 57371e360..151bd69c3 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -117,9 +117,25 @@ assign_int :: proc(val: any, i: $T) -> bool { case uint: dst = uint (i) case uintptr: dst = uintptr(i) case: + is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { + if ti == nil { + return false + } + t := runtime.type_info_base(ti) + #partial switch info in t.variant { + case runtime.Type_Info_Integer: + switch info.endianness { + case .Platform: return false + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big + } + } + return false + } + ti := type_info_of(v.id) - if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { - do_byte_swap := !reflect.bit_set_is_big_endian(v) + if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) switch ti.size * 8 { case 0: // no-op. case 8: -- cgit v1.2.3 From 13566306d2134bf3d4790bb569af8c888908a97b Mon Sep 17 00:00:00 2001 From: Ely Alon Date: Mon, 28 Apr 2025 22:03:20 +0300 Subject: Fix typo in private function --- core/encoding/json/tokenizer.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'core/encoding/json') diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index e46d879a7..ad928b7d9 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -101,7 +101,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } } - scan_espace :: proc(t: ^Tokenizer) -> bool { + scan_escape :: proc(t: ^Tokenizer) -> bool { switch t.r { case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f': next_rune(t) @@ -310,7 +310,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { break } if r == '\\' { - scan_espace(t) + scan_escape(t) } } -- cgit v1.2.3 From b7de15caa342fd81fb203015e2fd22a82e523342 Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Thu, 5 Jun 2025 16:29:41 -0400 Subject: Clarify `strconv.append_*` to `strconv.write_*` --- core/encoding/cbor/cbor.odin | 6 +-- core/encoding/json/marshal.odin | 8 ++-- core/fmt/fmt.odin | 8 ++-- core/io/util.odin | 16 +++---- core/math/fixed/fixed.odin | 6 +-- core/net/url.odin | 2 +- core/os/os.odin | 2 +- core/os/os2/file_util.odin | 2 +- core/strconv/integers.odin | 14 +++---- core/strconv/strconv.odin | 84 ++++++++++++++++++------------------- core/strings/builder.odin | 12 +++--- tests/core/math/test_core_math.odin | 2 +- 12 files changed, 81 insertions(+), 81 deletions(-) (limited to 'core/encoding/json') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 8eb829ed3..1fb7c34ab 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -385,17 +385,17 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i // which we want for the diagnostic format. case f16: buf: [64]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return case f32: buf: [128]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return case f64: buf: [256]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index 020facd14..ebb9a639c 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -108,13 +108,13 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: if opt.write_uint_as_hex && (opt.spec == .JSON5 || opt.spec == .MJSON) { switch i in a { case u8, u16, u32, u64, u128: - s = strconv.append_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix }) + s = strconv.write_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix }) case: - s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) + s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) } } else { - s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) + s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) } io.write_string(w, s) or_return @@ -286,7 +286,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: case runtime.Type_Info_Integer: buf: [40]byte u := cast_any_int_to_u128(ka) - name = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil) + name = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil) opt_write_key(w, opt, name) or_return case: return .Unsupported_Type diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 9c07847dd..0f6470cca 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1122,7 +1122,7 @@ _fmt_int :: proc(fi: ^Info, u: u64, base: int, is_signed: bool, bit_size: int, d flags: strconv.Int_Flags if fi.hash && !fi.zero && start == 0 { flags += {.Prefix} } if fi.plus { flags += {.Plus} } - s := strconv.append_bits(buf[start:], u, base, is_signed, bit_size, digits, flags) + s := strconv.write_bits(buf[start:], u, base, is_signed, bit_size, digits, flags) prev_zero := fi.zero defer fi.zero = prev_zero fi.zero = false @@ -1207,7 +1207,7 @@ _fmt_int_128 :: proc(fi: ^Info, u: u128, base: int, is_signed: bool, bit_size: i flags: strconv.Int_Flags if fi.hash && !fi.zero && start == 0 { flags += {.Prefix} } if fi.plus { flags += {.Plus} } - s := strconv.append_bits_128(buf[start:], u, base, is_signed, bit_size, digits, flags) + s := strconv.write_bits_128(buf[start:], u, base, is_signed, bit_size, digits, flags) if fi.hash && fi.zero && fi.indent == 0 { c: byte = 0 @@ -1272,7 +1272,7 @@ _fmt_memory :: proc(fi: ^Info, u: u64, is_signed: bool, bit_size: int, units: st } buf: [256]byte - str := strconv.append_float(buf[:], amt, 'f', prec, 64) + str := strconv.write_float(buf[:], amt, 'f', prec, 64) // Add the unit at the end. copy(buf[len(str):], units[off:off+unit_len]) @@ -1424,7 +1424,7 @@ _fmt_float_as :: proc(fi: ^Info, v: f64, bit_size: int, verb: rune, float_fmt: b buf: [386]byte // Can return "NaN", "+Inf", "-Inf", "+", "-". - str := strconv.append_float(buf[:], v, float_fmt, prec, bit_size) + str := strconv.write_float(buf[:], v, float_fmt, prec, bit_size) if !fi.plus { // Strip sign from "+" but not "+Inf". diff --git a/core/io/util.odin b/core/io/util.odin index fdbbd5b9f..fa98e007b 100644 --- a/core/io/util.odin +++ b/core/io/util.odin @@ -22,12 +22,12 @@ write_ptr_at :: proc(w: Writer_At, p: rawptr, byte_size: int, offset: i64, n_wri write_u64 :: proc(w: Writer, i: u64, base: int = 10, n_written: ^int = nil) -> (n: int, err: Error) { buf: [32]byte - s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil) + s := strconv.write_bits(buf[:], i, base, false, 64, strconv.digits, nil) return write_string(w, s, n_written) } write_i64 :: proc(w: Writer, i: i64, base: int = 10, n_written: ^int = nil) -> (n: int, err: Error) { buf: [32]byte - s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) + s := strconv.write_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) return write_string(w, s, n_written) } @@ -40,18 +40,18 @@ write_int :: proc(w: Writer, i: int, base: int = 10, n_written: ^int = nil) -> ( write_u128 :: proc(w: Writer, i: u128, base: int = 10, n_written: ^int = nil) -> (n: int, err: Error) { buf: [39]byte - s := strconv.append_bits_128(buf[:], i, base, false, 128, strconv.digits, nil) + s := strconv.write_bits_128(buf[:], i, base, false, 128, strconv.digits, nil) return write_string(w, s, n_written) } write_i128 :: proc(w: Writer, i: i128, base: int = 10, n_written: ^int = nil) -> (n: int, err: Error) { buf: [40]byte - s := strconv.append_bits_128(buf[:], u128(i), base, true, 128, strconv.digits, nil) + s := strconv.write_bits_128(buf[:], u128(i), base, true, 128, strconv.digits, nil) return write_string(w, s, n_written) } write_f16 :: proc(w: Writer, val: f16, n_written: ^int = nil) -> (n: int, err: Error) { buf: [386]byte - str := strconv.append_float(buf[1:], f64(val), 'f', 2*size_of(val), 8*size_of(val)) + str := strconv.write_float(buf[1:], f64(val), 'f', 2*size_of(val), 8*size_of(val)) s := buf[:len(str)+1] if s[1] == '+' || s[1] == '-' { s = s[1:] @@ -67,7 +67,7 @@ write_f16 :: proc(w: Writer, val: f16, n_written: ^int = nil) -> (n: int, err: E write_f32 :: proc(w: Writer, val: f32, n_written: ^int = nil) -> (n: int, err: Error) { buf: [386]byte - str := strconv.append_float(buf[1:], f64(val), 'f', 2*size_of(val), 8*size_of(val)) + str := strconv.write_float(buf[1:], f64(val), 'f', 2*size_of(val), 8*size_of(val)) s := buf[:len(str)+1] if s[1] == '+' || s[1] == '-' { s = s[1:] @@ -83,7 +83,7 @@ write_f32 :: proc(w: Writer, val: f32, n_written: ^int = nil) -> (n: int, err: E write_f64 :: proc(w: Writer, val: f64, n_written: ^int = nil) -> (n: int, err: Error) { buf: [386]byte - str := strconv.append_float(buf[1:], val, 'f', 2*size_of(val), 8*size_of(val)) + str := strconv.write_float(buf[1:], val, 'f', 2*size_of(val), 8*size_of(val)) s := buf[:len(str)+1] if s[1] == '+' || s[1] == '-' { s = s[1:] @@ -130,7 +130,7 @@ write_encoded_rune :: proc(w: Writer, r: rune, write_quote := true, n_written: ^ write_string(w, `\x`, &n) or_return buf: [2]byte - s := strconv.append_bits(buf[:], u64(r), 16, true, 64, strconv.digits, nil) + s := strconv.write_bits(buf[:], u64(r), 16, true, 64, strconv.digits, nil) switch len(s) { case 0: write_string(w, "00", &n) or_return diff --git a/core/math/fixed/fixed.odin b/core/math/fixed/fixed.odin index b23090307..119e727a7 100644 --- a/core/math/fixed/fixed.odin +++ b/core/math/fixed/fixed.odin @@ -124,16 +124,16 @@ append :: proc(dst: []byte, x: $T/Fixed($Backing, $Fraction_Width)) -> string { when size_of(Backing) < 16 { T :: u64 - append_uint :: strconv.append_uint + write_uint :: strconv.write_uint } else { T :: u128 - append_uint :: strconv.append_u128 + write_uint :: strconv.write_u128 } integer := T(x.i) >> Fraction_Width fraction := T(x.i) & (1< string { bytes, n := utf8.encode_rune(ch) for byte in bytes[:n] { buf: [2]u8 = --- - t := strconv.append_int(buf[:], i64(byte), 16) + t := strconv.write_int(buf[:], i64(byte), 16) strings.write_rune(&b, '%') strings.write_string(&b, t) } diff --git a/core/os/os.odin b/core/os/os.odin index 30b86d4cd..fde48fbf4 100644 --- a/core/os/os.odin +++ b/core/os/os.odin @@ -57,7 +57,7 @@ write_encoded_rune :: proc(f: Handle, r: rune) -> (n: int, err: Error) { if r < 32 { if wrap(write_string(f, "\\x"), &n, &err) { return } b: [2]byte - s := strconv.append_bits(b[:], u64(r), 16, true, 64, strconv.digits, nil) + s := strconv.write_bits(b[:], u64(r), 16, true, 64, strconv.digits, nil) switch len(s) { case 0: if wrap(write_string(f, "00"), &n, &err) { return } case 1: if wrap(write_rune(f, '0'), &n, &err) { return } diff --git a/core/os/os2/file_util.odin b/core/os/os2/file_util.odin index 8af46fab3..407c38f88 100644 --- a/core/os/os2/file_util.odin +++ b/core/os/os2/file_util.odin @@ -59,7 +59,7 @@ write_encoded_rune :: proc(f: ^File, r: rune) -> (n: int, err: Error) { if r < 32 { if wrap(write_string(f, "\\x"), &n, &err) { return } b: [2]byte - s := strconv.append_bits(b[:], u64(r), 16, true, 64, strconv.digits, nil) + s := strconv.write_bits(b[:], u64(r), 16, true, 64, strconv.digits, nil) switch len(s) { case 0: if wrap(write_string(f, "00"), &n, &err) { return } case 1: if wrap(write_rune(f, '0'), &n, &err) { return } diff --git a/core/strconv/integers.odin b/core/strconv/integers.odin index 98a432ac5..0db110d10 100644 --- a/core/strconv/integers.odin +++ b/core/strconv/integers.odin @@ -48,7 +48,7 @@ is_integer_negative :: proc(x: u64, is_signed: bool, bit_size: int) -> (u: u64, return } /* -Appends the string representation of an integer to a buffer with specified base, flags, and digit set. +Writes the string representation of an integer to a buffer with specified base, flags, and digit set. **Inputs** - buf: The buffer to append the integer representation to @@ -62,9 +62,9 @@ Appends the string representation of an integer to a buffer with specified base, **Returns** - The string containing the integer representation appended to the buffer */ -append_bits :: proc(buf: []byte, x: u64, base: int, is_signed: bool, bit_size: int, digits: string, flags: Int_Flags) -> string { +write_bits :: proc(buf: []byte, x: u64, base: int, is_signed: bool, bit_size: int, digits: string, flags: Int_Flags) -> string { if base < 2 || base > MAX_BASE { - panic("strconv: illegal base passed to append_bits") + panic("strconv: illegal base passed to write_bits") } a: [129]byte @@ -146,7 +146,7 @@ is_integer_negative_128 :: proc(x: u128, is_signed: bool, bit_size: int) -> (u: return } /* -Appends the string representation of a 128-bit integer to a buffer with specified base, flags, and digit set. +Writes the string representation of a 128-bit integer to a buffer with specified base, flags, and digit set. **Inputs** - buf: The buffer to append the integer representation to @@ -158,11 +158,11 @@ Appends the string representation of a 128-bit integer to a buffer with specifie - flags: The Int_Flags bit set to control integer formatting **Returns** -- The string containing the integer representation appended to the buffer +- The string containing the integer representation written to the buffer */ -append_bits_128 :: proc(buf: []byte, x: u128, base: int, is_signed: bool, bit_size: int, digits: string, flags: Int_Flags) -> string { +write_bits_128 :: proc(buf: []byte, x: u128, base: int, is_signed: bool, bit_size: int, digits: string, flags: Int_Flags) -> string { if base < 2 || base > MAX_BASE { - panic("strconv: illegal base passed to append_bits") + panic("strconv: illegal base passed to write_bits") } a: [140]byte diff --git a/core/strconv/strconv.odin b/core/strconv/strconv.odin index 4cecd1911..99a290d83 100644 --- a/core/strconv/strconv.odin +++ b/core/strconv/strconv.odin @@ -1451,19 +1451,19 @@ parse_quaternion64 :: proc(str: string, n: ^int = nil) -> (value: quaternion64, return cast(quaternion64)v, ok } /* -Appends a boolean value as a string to the given buffer +Writes a boolean value as a string to the given buffer **Inputs** -- buf: The buffer to append the boolean value to -- b: The boolean value to be appended +- buf: The buffer to write the boolean value to +- b: The boolean value to be written Example: import "core:fmt" import "core:strconv" - append_bool_example :: proc() { + write_bool_example :: proc() { buf: [6]byte - result := strconv.append_bool(buf[:], true) + result := strconv.write_bool(buf[:], true) fmt.println(result, buf) } @@ -1472,9 +1472,9 @@ Output: true [116, 114, 117, 101, 0, 0] **Returns** -- The resulting string after appending the boolean value +- The resulting string after writing the boolean value */ -append_bool :: proc(buf: []byte, b: bool) -> string { +write_bool :: proc(buf: []byte, b: bool) -> string { n := 0 if b { n = copy(buf, "true") @@ -1484,20 +1484,20 @@ append_bool :: proc(buf: []byte, b: bool) -> string { return string(buf[:n]) } /* -Appends an unsigned integer value as a string to the given buffer with the specified base +Writes an unsigned integer value as a string to the given buffer with the specified base **Inputs** -- buf: The buffer to append the unsigned integer value to -- u: The unsigned integer value to be appended +- buf: The buffer to write the unsigned integer value to +- u: The unsigned integer value to be written - base: The base to use for converting the integer value Example: import "core:fmt" import "core:strconv" - append_uint_example :: proc() { + write_uint_example :: proc() { buf: [4]byte - result := strconv.append_uint(buf[:], 42, 16) + result := strconv.write_uint(buf[:], 42, 16) fmt.println(result, buf) } @@ -1506,26 +1506,26 @@ Output: 2a [50, 97, 0, 0] **Returns** -- The resulting string after appending the unsigned integer value +- The resulting string after writing the unsigned integer value */ -append_uint :: proc(buf: []byte, u: u64, base: int) -> string { - return append_bits(buf, u, base, false, 8*size_of(uint), digits, nil) +write_uint :: proc(buf: []byte, u: u64, base: int) -> string { + return write_bits(buf, u, base, false, 8*size_of(uint), digits, nil) } /* -Appends a signed integer value as a string to the given buffer with the specified base +Writes a signed integer value as a string to the given buffer with the specified base **Inputs** -- buf: The buffer to append the signed integer value to -- i: The signed integer value to be appended +- buf: The buffer to write the signed integer value to +- i: The signed integer value to be written - base: The base to use for converting the integer value Example: import "core:fmt" import "core:strconv" - append_int_example :: proc() { + write_int_example :: proc() { buf: [4]byte - result := strconv.append_int(buf[:], -42, 10) + result := strconv.write_int(buf[:], -42, 10) fmt.println(result, buf) } @@ -1534,16 +1534,16 @@ Output: -42 [45, 52, 50, 0] **Returns** -- The resulting string after appending the signed integer value +- The resulting string after writing the signed integer value */ -append_int :: proc(buf: []byte, i: i64, base: int) -> string { - return append_bits(buf, u64(i), base, true, 8*size_of(int), digits, nil) +write_int :: proc(buf: []byte, i: i64, base: int) -> string { + return write_bits(buf, u64(i), base, true, 8*size_of(int), digits, nil) } -append_u128 :: proc(buf: []byte, u: u128, base: int) -> string { - return append_bits_128(buf, u, base, false, 8*size_of(uint), digits, nil) +write_u128 :: proc(buf: []byte, u: u128, base: int) -> string { + return write_bits_128(buf, u, base, false, 8*size_of(uint), digits, nil) } /* @@ -1571,7 +1571,7 @@ Output: - The resulting string after converting the integer value */ itoa :: proc(buf: []byte, i: int) -> string { - return append_int(buf, i64(i), 10) + return write_int(buf, i64(i), 10) } /* Converts a string to an integer value @@ -1623,14 +1623,14 @@ atof :: proc(s: string) -> f64 { v, _ := parse_f64(s) return v } -// Alias to `append_float` -ftoa :: append_float +// Alias to `write_float` +ftoa :: write_float /* -Appends a float64 value as a string to the given buffer with the specified format and precision +Writes a float64 value as a string to the given buffer with the specified format and precision **Inputs** -- buf: The buffer to append the float64 value to -- f: The float64 value to be appended +- buf: The buffer to write the float64 value to +- f: The float64 value to be written - fmt: The byte specifying the format to use for the conversion - prec: The precision to use for the conversion - bit_size: The size of the float in bits (32 or 64) @@ -1639,9 +1639,9 @@ Example: import "core:fmt" import "core:strconv" - append_float_example :: proc() { + write_float_example :: proc() { buf: [8]byte - result := strconv.append_float(buf[:], 3.14159, 'f', 2, 64) + result := strconv.write_float(buf[:], 3.14159, 'f', 2, 64) fmt.println(result, buf) } @@ -1650,16 +1650,16 @@ Output: +3.14 [43, 51, 46, 49, 52, 0, 0, 0] **Returns** -- The resulting string after appending the float +- The resulting string after writing the float */ -append_float :: proc(buf: []byte, f: f64, fmt: byte, prec, bit_size: int) -> string { +write_float :: proc(buf: []byte, f: f64, fmt: byte, prec, bit_size: int) -> string { return string(generic_ftoa(buf, f, fmt, prec, bit_size)) } /* -Appends a quoted string representation of the input string to a given byte slice and returns the result as a string +Writes a quoted string representation of the input string to a given byte slice and returns the result as a string **Inputs** -- buf: The byte slice to which the quoted string will be appended +- buf: The byte slice to which the quoted string will be written - str: The input string to be quoted !! ISSUE !! NOT EXPECTED -- "\"hello\"" was expected @@ -1679,7 +1679,7 @@ Output: "'h''e''l''l''o'" [34, 39, 104, 39, 39, 101, 39, 39, 108, 39, 39, 108, 39, 39, 111, 39, 34, 0, 0, 0] **Returns** -- The resulting string after appending the quoted string representation +- The resulting string after writing the quoted string representation */ quote :: proc(buf: []byte, str: string) -> string { write_byte :: proc(buf: []byte, i: ^int, bytes: ..byte) { @@ -1719,10 +1719,10 @@ quote :: proc(buf: []byte, str: string) -> string { return string(buf[:i]) } /* -Appends a quoted rune representation of the input rune to a given byte slice and returns the result as a string +Writes a quoted rune representation of the input rune to a given byte slice and returns the result as a string **Inputs** -- buf: The byte slice to which the quoted rune will be appended +- buf: The byte slice to which the quoted rune will be written - r: The input rune to be quoted Example: @@ -1740,7 +1740,7 @@ Output: 'A' [39, 65, 39, 0] **Returns** -- The resulting string after appending the quoted rune representation +- The resulting string after writing the quoted rune representation */ quote_rune :: proc(buf: []byte, r: rune) -> string { write_byte :: proc(buf: []byte, i: ^int, bytes: ..byte) { @@ -1783,7 +1783,7 @@ quote_rune :: proc(buf: []byte, r: rune) -> string { if r < 32 { write_string(buf, &i, "\\x") b: [2]byte - s := append_bits(b[:], u64(r), 16, true, 64, digits, nil) + s := write_bits(b[:], u64(r), 16, true, 64, digits, nil) switch len(s) { case 0: write_string(buf, &i, "00") case 1: write_rune(buf, &i, '0') diff --git a/core/strings/builder.odin b/core/strings/builder.odin index 05382f04e..b1180d5e9 100644 --- a/core/strings/builder.odin +++ b/core/strings/builder.odin @@ -675,7 +675,7 @@ Returns: */ write_float :: proc(b: ^Builder, f: f64, fmt: byte, prec, bit_size: int, always_signed := false) -> (n: int) { buf: [384]byte - s := strconv.append_float(buf[:], f, fmt, prec, bit_size) + s := strconv.write_float(buf[:], f, fmt, prec, bit_size) // If the result starts with a `+` then unless we always want signed results, // we skip it unless it's followed by an `I` (because of +Inf). if !always_signed && (buf[0] == '+' && buf[1] != 'I') { @@ -699,7 +699,7 @@ Returns: */ write_f16 :: proc(b: ^Builder, f: f16, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte - s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) + s := strconv.write_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) if !always_signed && (buf[0] == '+' && buf[1] != 'I') { s = s[1:] } @@ -739,7 +739,7 @@ Output: */ write_f32 :: proc(b: ^Builder, f: f32, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte - s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) + s := strconv.write_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) if !always_signed && (buf[0] == '+' && buf[1] != 'I') { s = s[1:] } @@ -761,7 +761,7 @@ Returns: */ write_f64 :: proc(b: ^Builder, f: f64, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte - s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) + s := strconv.write_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) if !always_signed && (buf[0] == '+' && buf[1] != 'I') { s = s[1:] } @@ -782,7 +782,7 @@ Returns: */ write_u64 :: proc(b: ^Builder, i: u64, base: int = 10) -> (n: int) { buf: [32]byte - s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil) + s := strconv.write_bits(buf[:], i, base, false, 64, strconv.digits, nil) return write_string(b, s) } /* @@ -800,7 +800,7 @@ Returns: */ write_i64 :: proc(b: ^Builder, i: i64, base: int = 10) -> (n: int) { buf: [32]byte - s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) + s := strconv.write_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) return write_string(b, s) } /* diff --git a/tests/core/math/test_core_math.odin b/tests/core/math/test_core_math.odin index 5797cb4ea..009e3fedd 100644 --- a/tests/core/math/test_core_math.odin +++ b/tests/core/math/test_core_math.odin @@ -1238,7 +1238,7 @@ test_count_digits :: proc(t: ^testing.T) { buf: [64]u8 for n in 0.. Date: Sat, 2 Aug 2025 11:55:16 +0100 Subject: Begin supporting `string16` across the core library --- base/intrinsics/intrinsics.odin | 1 + base/runtime/print.odin | 6 +++ core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 + core/encoding/json/marshal.odin | 8 ++-- core/encoding/json/unmarshal.odin | 4 +- core/flags/internal_rtti.odin | 2 + core/fmt/fmt.odin | 16 ++++---- core/io/io.odin | 4 +- core/reflect/types.odin | 8 ++-- src/check_builtin.cpp | 2 + src/check_expr.cpp | 42 +++++++++++++++++---- src/checker_builtin_procs.hpp | 2 + src/llvm_backend.cpp | 6 +++ src/llvm_backend_const.cpp | 77 +++++++++++++++++++++++++++++++++++++-- src/llvm_backend_debug.cpp | 14 +++++++ src/llvm_backend_expr.cpp | 3 +- src/llvm_backend_general.cpp | 37 +++++++++++++++++++ src/llvm_backend_utility.cpp | 19 +++++++++- src/string.cpp | 7 ++++ 20 files changed, 230 insertions(+), 32 deletions(-) (limited to 'core/encoding/json') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index be75739fe..d45d24f48 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool --- type_is_string :: proc($T: typeid) -> bool --- type_is_typeid :: proc($T: typeid) -> bool --- type_is_any :: proc($T: typeid) -> bool --- +type_is_string16 :: proc($T: typeid) -> bool --- type_is_endian_platform :: proc($T: typeid) -> bool --- type_is_endian_little :: proc($T: typeid) -> bool --- diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 145f002d1..85ed49445 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -293,7 +293,13 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_string("quaternion") print_u64(u64(8*ti.size)) case Type_Info_String: + if info.is_cstring { + print_byte('c') + } print_string("string") + if info.is_utf16 { + print_string("16") + } case Type_Info_Boolean: switch ti.id { case bool: print_string("bool") diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 17420af46..e0e69cbf5 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - + assert(!t.is_utf16) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 365ac5d6f..2840429f5 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -335,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: + assert(!t.is_utf16) + bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return if t.is_cstring { diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index ebb9a639c..cdb00a354 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -353,10 +353,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: #partial switch info in ti.variant { case runtime.Type_Info_String: switch x in v { - case string: - return x == "" - case cstring: - return x == nil || x == "" + case string: return x == "" + case cstring: return x == nil || x == "" + case string16: return x == "" + case cstring16: return x == nil || x == "" } case runtime.Type_Info_Any: return v.(any) == nil diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index b9ed1476f..51e7e3b81 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -570,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm key_ptr: rawptr #partial switch tk in t.key.variant { - case runtime.Type_Info_String: + case runtime.Type_Info_String: + assert(!tk.is_utf16) + key_ptr = rawptr(&key) key_cstr: cstring if reflect.is_cstring(t.key) { diff --git a/core/flags/internal_rtti.odin b/core/flags/internal_rtti.odin index 1c559ca55..58224cc87 100644 --- a/core/flags/internal_rtti.odin +++ b/core/flags/internal_rtti.odin @@ -127,6 +127,8 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: } case runtime.Type_Info_String: + assert(!specific_type_info.is_utf16) + if specific_type_info.is_cstring { cstr_ptr := (^cstring)(ptr) if cstr_ptr != nil { diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 7fe6287d4..9c245de94 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2346,14 +2346,14 @@ fmt_array :: proc(fi: ^Info, data: rawptr, n: int, elem_size: int, elem: ^reflec } switch reflect.type_info_base(elem).id { - case byte: fmt_string(fi, string(([^]byte)(data)[:n]), verb); return - case u16: print_utf16(fi, ([^]u16)(data)[:n]); return - case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return - case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return - case u32: print_utf32(fi, ([^]u32)(data)[:n]); return - case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return - case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return - case rune: print_utf32(fi, ([^]rune)(data)[:n]); return + case byte: fmt_string(fi, string (([^]byte)(data)[:n]), verb); return + case u16: fmt_string16(fi, string16(([^]u16) (data)[:n]), verb); return + case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return + case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return + case u32: print_utf32(fi, ([^]u32)(data)[:n]); return + case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return + case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return + case rune: print_utf32(fi, ([^]rune)(data)[:n]); return } } if verb == 'p' { diff --git a/core/io/io.odin b/core/io/io.odin index 5431519bf..c4eb6a073 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -319,7 +319,6 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int, write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) { for i := 0; i < len(str); i += 1 { r := rune(utf16.REPLACEMENT_CHAR) - switch c := str[i]; { case c < utf16._surr1, utf16._surr3 <= c: r = rune(c) @@ -329,7 +328,8 @@ write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: i i += 1 } - w, err := write_rune(s, r, n_written) + w: int + w, err = write_rune(s, r, n_written) n += w if err != nil { return diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 511c5c9bd..2351408cc 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -511,9 +511,11 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_i64(w, i64(8*ti.size), 10, &n) or_return case Type_Info_String: if info.is_cstring { - io.write_string(w, "cstring", &n) or_return - } else { - io.write_string(w, "string", &n) or_return + io.write_byte(w, 'c', &n) or_return + } + io.write_string(w, "string", &n) or_return + if info.is_utf16 { + io.write_string(w, "16", &n) or_return } case Type_Info_Boolean: switch ti.id { diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index d36cf4520..4abace637 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool is_type_complex, is_type_quaternion, is_type_string, + is_type_string16, is_type_typeid, is_type_any, is_type_endian_platform, @@ -6139,6 +6140,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_type_is_complex: case BuiltinProc_type_is_quaternion: case BuiltinProc_type_is_string: + case BuiltinProc_type_is_string16: case BuiltinProc_type_is_typeid: case BuiltinProc_type_is_any: case BuiltinProc_type_is_endian_platform: diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 57073e22f..8d2e4d637 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i } else if (is_type_boolean(type)) { return in_value.kind == ExactValue_Bool; } else if (is_type_string(type)) { + if (in_value.kind == ExactValue_String16) { + return is_type_string16(type) || is_type_cstring16(type); + } return in_value.kind == ExactValue_String; } else if (is_type_integer(type) || is_type_rune(type)) { if (in_value.kind == ExactValue_Bool) { @@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i if (in_value.kind == ExactValue_String) { return false; } + if (in_value.kind == ExactValue_String16) { + return false; + } if (out_value) *out_value = in_value; } else if (is_type_bit_set(type)) { if (in_value.kind == ExactValue_Integer) { @@ -4654,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar break; } } + } else if (operand->value.kind == ExactValue_String16) { + String16 s = operand->value.value_string16; + if (is_type_u16_array(t)) { + if (s.len == t->Array.count) { + break; + } + } } operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -4983,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v if (success_) *success_ = true; if (finish_) *finish_ = true; return exact_value_u64(val); + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(0 <= index && index < value.value_string.len); + u16 val = value.value_string16[index]; + if (success_) *success_ = true; + if (finish_) *finish_ = true; + return exact_value_u64(val); } if (value.kind != ExactValue_Compound) { if (success_) *success_ = true; @@ -11124,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, o->expr = node; return kind; } - - String s = {}; - if (o->value.kind == ExactValue_String) { - s = o->value.value_string; - } - o->mode = Addressing_Constant; o->type = t; - o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + + if (o->value.kind == ExactValue_String16) { + String16 s = o->value.value_string16; + + o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } else { + String s = {}; + if (o->value.kind == ExactValue_String) { + s = o->value.value_string; + } + + o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } } return kind; } @@ -11221,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast Type *t = t_invalid; switch (node->tav.value.kind) { case ExactValue_String: t = t_untyped_string; break; + case ExactValue_String16: t = t_string16; break; // TODO(bill): determine this correctly case ExactValue_Float: t = t_untyped_float; break; case ExactValue_Complex: t = t_untyped_complex; break; case ExactValue_Quaternion: t = t_untyped_quaternion; break; @@ -11657,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) { return !v.value_bool; case ExactValue_String: return v.value_string.len == 0; + case ExactValue_String16: + return v.value_string16.len == 0; case ExactValue_Integer: return big_int_is_zero(&v.value_integer); case ExactValue_Float: diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 8e135ab10..bff887d9e 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin, BuiltinProc_type_is_complex, BuiltinProc_type_is_quaternion, BuiltinProc_type_is_string, + BuiltinProc_type_is_string16, BuiltinProc_type_is_typeid, BuiltinProc_type_is_any, @@ -607,6 +608,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("type_is_complex"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_quaternion"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_string"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("type_is_string16"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_typeid"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 13a1d8cf3..f37415cc1 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) { case Basic_string: return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}"); + case Basic_string16: + return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}"); + case Basic_cstring: return str_lit("*"); + case Basic_cstring16: return str_lit("*"); + + case Basic_any: return str_lit("{any=^v^v}"); // rawptr + ^Type_Info case Basic_typeid: diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index c3112934e..8c05ed4a2 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) { gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(!is_type_string16(t)); + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + LLVMValueRef values[3] = { + data, + LLVMConstNull(lb_type(m, t_i32)), + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 3); + } else { + LLVMValueRef values[2] = { + data, + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 2); + } +} + +gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(is_type_string16(t)); if (build_context.metrics.ptr_size < build_context.metrics.int_size) { LLVMValueRef values[3] = { data, @@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) { return lb_const_value(m, t_string, exact_value_string(value)); } +gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) { + return lb_const_value(m, t_string16, exact_value_string16(value)); +} + gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) { lbValue res = {}; @@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb GB_ASSERT(is_type_slice(type)); res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value; return res; - } else { + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(is_type_slice(type)); + GB_PANIC("TODO(bill): UTF-16 String"); + return res; + }else { ast_node(cl, CompoundLit, value.value_compound); isize count = cl->elems.count; @@ -751,15 +778,23 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb { bool custom_link_section = cc.link_section.len > 0; - LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + LLVMValueRef ptr = nullptr; lbValue res = {}; res.type = default_type(original_type); + if (is_type_string16(res.type) || is_type_cstring16(res.type)) { + TEMPORARY_ALLOCATOR_GUARD(); + String16 s16 = string_to_string16(temporary_allocator(), value.value_string); + ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section); + } else { + ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + } + if (custom_link_section) { LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); } - if (is_type_cstring(res.type)) { + if (is_type_cstring(res.type) || is_type_cstring16(res.type)) { res.value = ptr; } else { if (value.value_string.len == 0) { @@ -768,12 +803,46 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true); GB_ASSERT(is_type_string(original_type)); - res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + if (is_type_string16(res.type)) { + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); + } else { + res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + } + } + + return res; + } + + case ExactValue_String16: + { + GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type)); + + bool custom_link_section = cc.link_section.len > 0; + + LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section); + lbValue res = {}; + res.type = default_type(original_type); + + if (custom_link_section) { + LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); + } + + if (is_type_cstring16(res.type)) { + res.value = ptr; + } else { + if (value.value_string16.len == 0) { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true); + GB_ASSERT(is_type_string(original_type)); + + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); } return res; } + case ExactValue_Integer: if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) { LLVMTypeRef t = lb_type(m, original_type); diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 024c5564e..182920fc7 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned); return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7); } + + case Basic_string16: + { + LLVMMetadataRef elements[2] = {}; + elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0); + elements[1] = lb_debug_struct_field(m, str_lit("len"), t_int, int_bits); + return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements)); + } + case Basic_cstring16: + { + LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned); + return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7); + } + case Basic_any: { LLVMMetadataRef elements[2] = {}; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index fbf0dea11..3463b6083 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -4354,12 +4354,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) { } - case Type_Basic: { // Basic_string + case Type_Basic: { // Basic_string/Basic_string16 lbValue str; lbValue elem; lbValue len; lbValue index; + str = lb_build_expr(p, ie->expr); if (deref) { str = lb_emit_load(p, str); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index d9771a75b..9ef1c23c0 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2715,6 +2715,43 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co } } +gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) { + // TODO(bill): caching for UTF-16 strings + + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csbs$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 1); + + LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + return ptr; +} + gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) { LLVMValueRef ptr = nullptr; if (str.len != 0) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index d4117b7ff..ea1bae4e9 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) { case Type_Basic: switch (t->Basic.kind) { case Basic_string: + case Basic_string16: case Basic_any: return true; @@ -981,7 +982,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) { } else if (build_context.ptr_size != build_context.int_size) { switch (t->kind) { case Type_Basic: - if (t->Basic.kind != Basic_string) { + if (t->Basic.kind != Basic_string && + t->Basic.kind != Basic_string16) { break; } /*fallthrough*/ @@ -1160,6 +1162,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { case 0: result_type = alloc_type_pointer(t->Slice.elem); break; case 1: result_type = t_int; break; } + } else if (is_type_string16(t)) { + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } } else if (is_type_string(t)) { switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1273,6 +1280,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) { switch (t->kind) { case Type_Basic: switch (t->Basic.kind) { + case Basic_string16: + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } + break; case Basic_string: switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1440,6 +1453,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection e = lb_emit_struct_ep(p, e, index); break; + case Basic_string16: + e = lb_emit_struct_ep(p, e, index); + break; + default: GB_PANIC("un-gep-able type %s", type_to_string(type)); break; diff --git a/src/string.cpp b/src/string.cpp index 8405938f4..8cc0e93f3 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -79,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) { return make_string(s.text+lo, hi-lo); } +gb_internal String16 substring(String16 const &s, isize lo, isize hi) { + isize max = s.len; + GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max); + + return make_string16(s.text+lo, hi-lo); +} + gb_internal char *alloc_cstring(gbAllocator a, String s) { char *c_str = gb_alloc_array(a, char, s.len+1); -- cgit v1.2.3 From af3184adc96cef59fff986ea6400caa6dbdb56ae Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 5 Aug 2025 15:12:54 +0100 Subject: Change `is_utf16` field to `encoding` and use an enum --- base/runtime/core.odin | 7 ++++++- base/runtime/print.odin | 5 +++-- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- core/encoding/json/unmarshal.odin | 2 +- core/flags/internal_rtti.odin | 2 +- core/reflect/types.odin | 5 +++-- src/checker.cpp | 3 +++ src/llvm_backend_type.cpp | 16 ++++++++++++---- src/types.cpp | 2 ++ 10 files changed, 33 insertions(+), 13 deletions(-) (limited to 'core/encoding/json') diff --git a/base/runtime/core.odin b/base/runtime/core.odin index fe40427ff..478a3d307 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 { Dynamic = 3, } +Type_Info_String_Encoding_Kind :: enum u8 { + UTF_8 = 0, + UTF_16 = 1, +} + // Variant Types Type_Info_Named :: struct { name: string, @@ -73,7 +78,7 @@ Type_Info_Rune :: struct {} Type_Info_Float :: struct {endianness: Platform_Endianness} Type_Info_Complex :: struct {} Type_Info_Quaternion :: struct {} -Type_Info_String :: struct {is_cstring: bool, is_utf16: bool} +Type_Info_String :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind} Type_Info_Boolean :: struct {} Type_Info_Any :: struct {} Type_Info_Type_Id :: struct {} diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 85ed49445..2cfb6661b 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -297,8 +297,9 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_byte('c') } print_string("string") - if info.is_utf16 { - print_string("16") + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: print_string("16") } case Type_Info_Boolean: switch ti.id { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index e0e69cbf5..ae1664dfc 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - assert(!t.is_utf16) + assert(t.encoding == .UTF_8) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 2840429f5..043b2ec60 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -335,7 +335,7 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - assert(!t.is_utf16) + assert(t.encoding == .UTF_8) bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 51e7e3b81..0b65adaac 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -571,7 +571,7 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm #partial switch tk in t.key.variant { case runtime.Type_Info_String: - assert(!tk.is_utf16) + assert(tk.encoding == .UTF_8) key_ptr = rawptr(&key) key_cstr: cstring diff --git a/core/flags/internal_rtti.odin b/core/flags/internal_rtti.odin index 58224cc87..a1b050597 100644 --- a/core/flags/internal_rtti.odin +++ b/core/flags/internal_rtti.odin @@ -127,7 +127,7 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: } case runtime.Type_Info_String: - assert(!specific_type_info.is_utf16) + assert(specific_type_info.encoding == .UTF_8) if specific_type_info.is_cstring { cstr_ptr := (^cstring)(ptr) diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 2351408cc..98b7b368f 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -514,8 +514,9 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_byte(w, 'c', &n) or_return } io.write_string(w, "string", &n) or_return - if info.is_utf16 { - io.write_string(w, "16", &n) or_return + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: io.write_string(w, "16", &n) or_return } case Type_Info_Boolean: switch ti.id { diff --git a/src/checker.cpp b/src/checker.cpp index e9fa792f3..e72061f56 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -3101,6 +3101,9 @@ gb_internal void init_core_type_info(Checker *c) { GB_ASSERT(tis->fields.count == 5); + Entity *type_info_string_encoding_kind = find_core_entity(c, str_lit("Type_Info_String_Encoding_Kind")); + t_type_info_string_encoding_kind = type_info_string_encoding_kind->type; + Entity *type_info_variant = tis->fields[4]; Type *tiv_type = type_info_variant->type; GB_ASSERT(is_type_union(tiv_type)); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index a91d77fe5..d1e7c0559 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -525,7 +525,15 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ break; case Basic_string: - tag_type = t_type_info_string; + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } break; case Basic_cstring: @@ -533,7 +541,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, true).value, - lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); @@ -545,7 +553,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, false).value, - lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); @@ -558,7 +566,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, true).value, - lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); diff --git a/src/types.cpp b/src/types.cpp index 51d170f2b..c465714db 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -661,6 +661,8 @@ gb_global Type *t_type_info_enum_value = nullptr; gb_global Type *t_type_info_ptr = nullptr; gb_global Type *t_type_info_enum_value_ptr = nullptr; +gb_global Type *t_type_info_string_encoding_kind = nullptr; + gb_global Type *t_type_info_named = nullptr; gb_global Type *t_type_info_integer = nullptr; gb_global Type *t_type_info_rune = nullptr; -- cgit v1.2.3