aboutsummaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2025-08-06 16:09:18 +0100
committerGitHub <noreply@github.com>2025-08-06 16:09:18 +0100
commit09a1e170bc92a0ea48a8ee67599c2936e924fe4d (patch)
tree92b44b34a1f2f0c4a8c96a49ab61bb5177432ed7 /base
parentec7509430369eb5d57a081507792dc03b1c05bab (diff)
parentaf3184adc96cef59fff986ea6400caa6dbdb56ae (diff)
Merge pull request #5530 from odin-lang/bill/utf16-strings
UTF-16 string types: `string16` & `cstring16`
Diffstat (limited to 'base')
-rw-r--r--base/intrinsics/intrinsics.odin1
-rw-r--r--base/runtime/core.odin18
-rw-r--r--base/runtime/core_builtin.odin28
-rw-r--r--base/runtime/internal.odin149
-rw-r--r--base/runtime/print.odin7
5 files changed, 201 insertions, 2 deletions
diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin
index be75739fe..d45d24f48 100644
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool ---
type_is_string :: proc($T: typeid) -> bool ---
type_is_typeid :: proc($T: typeid) -> bool ---
type_is_any :: proc($T: typeid) -> bool ---
+type_is_string16 :: proc($T: typeid) -> bool ---
type_is_endian_platform :: proc($T: typeid) -> bool ---
type_is_endian_little :: proc($T: typeid) -> bool ---
diff --git a/base/runtime/core.odin b/base/runtime/core.odin
index baecb4146..478a3d307 100644
--- a/base/runtime/core.odin
+++ b/base/runtime/core.odin
@@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 {
Dynamic = 3,
}
+Type_Info_String_Encoding_Kind :: enum u8 {
+ UTF_8 = 0,
+ UTF_16 = 1,
+}
+
// Variant Types
Type_Info_Named :: struct {
name: string,
@@ -73,7 +78,7 @@ Type_Info_Rune :: struct {}
Type_Info_Float :: struct {endianness: Platform_Endianness}
Type_Info_Complex :: struct {}
Type_Info_Quaternion :: struct {}
-Type_Info_String :: struct {is_cstring: bool}
+Type_Info_String :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind}
Type_Info_Boolean :: struct {}
Type_Info_Any :: struct {}
Type_Info_Type_Id :: struct {}
@@ -397,6 +402,11 @@ Raw_String :: struct {
len: int,
}
+Raw_String16 :: struct {
+ data: [^]u16,
+ len: int,
+}
+
Raw_Slice :: struct {
data: rawptr,
len: int,
@@ -450,6 +460,12 @@ Raw_Cstring :: struct {
}
#assert(size_of(Raw_Cstring) == size_of(cstring))
+Raw_Cstring16 :: struct {
+ data: [^]u16,
+}
+#assert(size_of(Raw_Cstring16) == size_of(cstring16))
+
+
Raw_Soa_Pointer :: struct {
data: rawptr,
index: int,
diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin
index e2ba14f3a..09118998c 100644
--- a/base/runtime/core_builtin.odin
+++ b/base/runtime/core_builtin.odin
@@ -86,11 +86,26 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int
}
return n
}
+
+// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`.
+// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
+// of len(src) and len(dst).
+//
+// Prefer the procedure group `copy`.
+@builtin
+copy_from_string16 :: proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int {
+ n := min(len(dst), len(src))
+ if n > 0 {
+ intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(u16))
+ }
+ return n
+}
+
// `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`.
// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
// of len(src) and len(dst).
@builtin
-copy :: proc{copy_slice, copy_from_string}
+copy :: proc{copy_slice, copy_from_string, copy_from_string16}
@@ -285,6 +300,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error
}
+@builtin
+delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+ return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc)
+}
+@builtin
+delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+ return mem_free((^u16)(str), allocator, loc)
+}
+
// `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation.
//
// Note: Prefer `delete` over the specific `delete_*` procedures where possible.
@@ -297,6 +321,8 @@ delete :: proc{
delete_map,
delete_soa_slice,
delete_soa_dynamic_array,
+ delete_string16,
+ delete_cstring16,
}
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin
index 907b187f1..4f9509b23 100644
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -493,12 +493,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int {
return ret
}
+
+string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool {
+ x := transmute(Raw_String16)lhs
+ y := transmute(Raw_String16)rhs
+ if x.len != y.len {
+ return false
+ }
+ return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16))
+}
+
+string16_cmp :: proc "contextless" (a, b: string16) -> int {
+ x := transmute(Raw_String16)a
+ y := transmute(Raw_String16)b
+
+ ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16))
+ if ret == 0 && x.len != y.len {
+ return -1 if x.len < y.len else +1
+ }
+ return ret
+}
+
string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }
+string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) }
+string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 }
+string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 }
+string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 }
+string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 }
+
+
cstring_len :: proc "contextless" (s: cstring) -> int {
p0 := uintptr((^byte)(s))
p := p0
@@ -508,6 +536,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int {
return int(p - p0)
}
+cstring16_len :: proc "contextless" (s: cstring16) -> int {
+ p := ([^]u16)(s)
+ n := 0
+ for p != nil && p[0] != 0 {
+ p = p[1:]
+ n += 1
+ }
+ return n
+}
+
cstring_to_string :: proc "contextless" (s: cstring) -> string {
if s == nil {
return ""
@@ -517,6 +555,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string {
return transmute(string)Raw_String{ptr, n}
}
+cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 {
+ if s == nil {
+ return ""
+ }
+ ptr := (^u16)(s)
+ n := cstring16_len(s)
+ return transmute(string16)Raw_String16{ptr, n}
+}
+
cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
x := ([^]byte)(lhs)
@@ -559,6 +606,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return
cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }
+cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool {
+ x := ([^]u16)(lhs)
+ y := ([^]u16)(rhs)
+ if x == y {
+ return true
+ }
+ if (x == nil) ~ (y == nil) {
+ return false
+ }
+ xn := cstring16_len(lhs)
+ yn := cstring16_len(rhs)
+ if xn != yn {
+ return false
+ }
+ return #force_inline memory_equal(x, y, xn*size_of(u16))
+}
+
+cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int {
+ x := ([^]u16)(lhs)
+ y := ([^]u16)(rhs)
+ if x == y {
+ return 0
+ }
+ if (x == nil) ~ (y == nil) {
+ return -1 if x == nil else +1
+ }
+ xn := cstring16_len(lhs)
+ yn := cstring16_len(rhs)
+ ret := memory_compare(x, y, min(xn, yn)*size_of(u16))
+ if ret == 0 && xn != yn {
+ return -1 if xn < yn else +1
+ }
+ return ret
+}
+
+cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) }
+cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 }
+cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 }
+cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 }
+cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 }
complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
@@ -694,6 +781,68 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
return r, size
}
+
+string16_decode_rune :: #force_inline proc "contextless" (s: string16) -> (rune, int) {
+ REPLACEMENT_CHAR :: '\ufffd'
+ _surr1 :: 0xd800
+ _surr2 :: 0xdc00
+ _surr3 :: 0xe000
+ _surr_self :: 0x10000
+
+ r := rune(REPLACEMENT_CHAR)
+
+ if len(s) < 1 {
+ return r, 0
+ }
+
+ w := 1
+ switch c := s[0]; {
+ case c < _surr1, _surr3 <= c:
+ r = rune(c)
+ case _surr1 <= c && c < _surr2 && 1 < len(s) &&
+ _surr2 <= s[1] && s[1] < _surr3:
+ r1, r2 := rune(c), rune(s[1])
+ if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
+ r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
+ }
+ w += 1
+ }
+ return r, w
+}
+
+string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) {
+ REPLACEMENT_CHAR :: '\ufffd'
+ _surr1 :: 0xd800
+ _surr2 :: 0xdc00
+ _surr3 :: 0xe000
+ _surr_self :: 0x10000
+
+ r := rune(REPLACEMENT_CHAR)
+
+ if len(s) < 1 {
+ return r, 0
+ }
+
+ n := len(s)-1
+ c := s[n]
+ w := 1
+ if _surr2 <= c && c < _surr3 {
+ if n >= 1 {
+ r1 := rune(s[n-1])
+ r2 := rune(c)
+ if _surr1 <= r1 && r1 < _surr2 {
+ r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
+ }
+ w = 2
+ }
+ } else if c < _surr1 || _surr3 <= c {
+ r = rune(c)
+ }
+ return r, w
+}
+
+
+
abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
p, q := abs(real(x)), abs(imag(x))
if p < q {
diff --git a/base/runtime/print.odin b/base/runtime/print.odin
index 145f002d1..2cfb6661b 100644
--- a/base/runtime/print.odin
+++ b/base/runtime/print.odin
@@ -293,7 +293,14 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
print_string("quaternion")
print_u64(u64(8*ti.size))
case Type_Info_String:
+ if info.is_cstring {
+ print_byte('c')
+ }
print_string("string")
+ switch info.encoding {
+ case .UTF_8: /**/
+ case .UTF_16: print_string("16")
+ }
case Type_Info_Boolean:
switch ti.id {
case bool: print_string("bool")