diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2025-08-06 16:09:18 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-06 16:09:18 +0100 |
| commit | 09a1e170bc92a0ea48a8ee67599c2936e924fe4d (patch) | |
| tree | 92b44b34a1f2f0c4a8c96a49ab61bb5177432ed7 /base | |
| parent | ec7509430369eb5d57a081507792dc03b1c05bab (diff) | |
| parent | af3184adc96cef59fff986ea6400caa6dbdb56ae (diff) | |
Merge pull request #5530 from odin-lang/bill/utf16-strings
UTF-16 string types: `string16` & `cstring16`
Diffstat (limited to 'base')
| -rw-r--r-- | base/intrinsics/intrinsics.odin | 1 | ||||
| -rw-r--r-- | base/runtime/core.odin | 18 | ||||
| -rw-r--r-- | base/runtime/core_builtin.odin | 28 | ||||
| -rw-r--r-- | base/runtime/internal.odin | 149 | ||||
| -rw-r--r-- | base/runtime/print.odin | 7 |
5 files changed, 201 insertions, 2 deletions
diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index be75739fe..d45d24f48 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool --- type_is_string :: proc($T: typeid) -> bool --- type_is_typeid :: proc($T: typeid) -> bool --- type_is_any :: proc($T: typeid) -> bool --- +type_is_string16 :: proc($T: typeid) -> bool --- type_is_endian_platform :: proc($T: typeid) -> bool --- type_is_endian_little :: proc($T: typeid) -> bool --- diff --git a/base/runtime/core.odin b/base/runtime/core.odin index baecb4146..478a3d307 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 { Dynamic = 3, } +Type_Info_String_Encoding_Kind :: enum u8 { + UTF_8 = 0, + UTF_16 = 1, +} + // Variant Types Type_Info_Named :: struct { name: string, @@ -73,7 +78,7 @@ Type_Info_Rune :: struct {} Type_Info_Float :: struct {endianness: Platform_Endianness} Type_Info_Complex :: struct {} Type_Info_Quaternion :: struct {} -Type_Info_String :: struct {is_cstring: bool} +Type_Info_String :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind} Type_Info_Boolean :: struct {} Type_Info_Any :: struct {} Type_Info_Type_Id :: struct {} @@ -397,6 +402,11 @@ Raw_String :: struct { len: int, } +Raw_String16 :: struct { + data: [^]u16, + len: int, +} + Raw_Slice :: struct { data: rawptr, len: int, @@ -450,6 +460,12 @@ Raw_Cstring :: struct { } #assert(size_of(Raw_Cstring) == size_of(cstring)) +Raw_Cstring16 :: struct { + data: [^]u16, +} +#assert(size_of(Raw_Cstring16) == size_of(cstring16)) + + Raw_Soa_Pointer :: struct { data: rawptr, index: int, diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin index e2ba14f3a..09118998c 100644 --- a/base/runtime/core_builtin.odin +++ b/base/runtime/core_builtin.odin @@ -86,11 +86,26 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int } return n } + +// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`. +// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum +// of len(src) and len(dst). +// +// Prefer the procedure group `copy`. +@builtin +copy_from_string16 :: proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int { + n := min(len(dst), len(src)) + if n > 0 { + intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(u16)) + } + return n +} + // `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`. // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum // of len(src) and len(dst). @builtin -copy :: proc{copy_slice, copy_from_string} +copy :: proc{copy_slice, copy_from_string, copy_from_string16} @@ -285,6 +300,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error } +@builtin +delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc) +} +@builtin +delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free((^u16)(str), allocator, loc) +} + // `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation. // // Note: Prefer `delete` over the specific `delete_*` procedures where possible. @@ -297,6 +321,8 @@ delete :: proc{ delete_map, delete_soa_slice, delete_soa_dynamic_array, + delete_string16, + delete_cstring16, } diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 907b187f1..4f9509b23 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -493,12 +493,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int { return ret } + +string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool { + x := transmute(Raw_String16)lhs + y := transmute(Raw_String16)rhs + if x.len != y.len { + return false + } + return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16)) +} + +string16_cmp :: proc "contextless" (a, b: string16) -> int { + x := transmute(Raw_String16)a + y := transmute(Raw_String16)b + + ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16)) + if ret == 0 && x.len != y.len { + return -1 if x.len < y.len else +1 + } + return ret +} + string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) } string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 } string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 } string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 } string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 } +string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) } +string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 } +string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 } +string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 } +string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 } + + cstring_len :: proc "contextless" (s: cstring) -> int { p0 := uintptr((^byte)(s)) p := p0 @@ -508,6 +536,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int { return int(p - p0) } +cstring16_len :: proc "contextless" (s: cstring16) -> int { + p := ([^]u16)(s) + n := 0 + for p != nil && p[0] != 0 { + p = p[1:] + n += 1 + } + return n +} + cstring_to_string :: proc "contextless" (s: cstring) -> string { if s == nil { return "" @@ -517,6 +555,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string { return transmute(string)Raw_String{ptr, n} } +cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 { + if s == nil { + return "" + } + ptr := (^u16)(s) + n := cstring16_len(s) + return transmute(string16)Raw_String16{ptr, n} +} + cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool { x := ([^]byte)(lhs) @@ -559,6 +606,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 } cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 } +cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return true + } + if (x == nil) ~ (y == nil) { + return false + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + if xn != yn { + return false + } + return #force_inline memory_equal(x, y, xn*size_of(u16)) +} + +cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return 0 + } + if (x == nil) ~ (y == nil) { + return -1 if x == nil else +1 + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + ret := memory_compare(x, y, min(xn, yn)*size_of(u16)) + if ret == 0 && xn != yn { + return -1 if xn < yn else +1 + } + return ret +} + +cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) } +cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 } +cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 } +cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 } +cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 } complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) } complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) } @@ -694,6 +781,68 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) { return r, size } + +string16_decode_rune :: #force_inline proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + w := 1 + switch c := s[0]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && 1 < len(s) && + _surr2 <= s[1] && s[1] < _surr3: + r1, r2 := rune(c), rune(s[1]) + if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w += 1 + } + return r, w +} + +string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + n := len(s)-1 + c := s[n] + w := 1 + if _surr2 <= c && c < _surr3 { + if n >= 1 { + r1 := rune(s[n-1]) + r2 := rune(c) + if _surr1 <= r1 && r1 < _surr2 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w = 2 + } + } else if c < _surr1 || _surr3 <= c { + r = rune(c) + } + return r, w +} + + + abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 { p, q := abs(real(x)), abs(imag(x)) if p < q { diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 145f002d1..2cfb6661b 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -293,7 +293,14 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_string("quaternion") print_u64(u64(8*ti.size)) case Type_Info_String: + if info.is_cstring { + print_byte('c') + } print_string("string") + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: print_string("16") + } case Type_Info_Boolean: switch ti.id { case bool: print_string("bool") |