From 2561427dd396a69cd49eb02c0814c4e8e8b3a08f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 11:00:15 +0100 Subject: Add `string16` and `cstring16` (UTF-16 based strings) --- src/check_expr.cpp | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'src/check_expr.cpp') diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 6723a7580..57073e22f 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2862,6 +2862,14 @@ gb_internal void add_comparison_procedures_for_fields(CheckerContext *c, Type *t add_package_dependency(c, "runtime", "string_eq"); add_package_dependency(c, "runtime", "string_ne"); break; + case Basic_cstring16: + add_package_dependency(c, "runtime", "cstring16_eq"); + add_package_dependency(c, "runtime", "cstring16_ne"); + break; + case Basic_string16: + add_package_dependency(c, "runtime", "string16_eq"); + add_package_dependency(c, "runtime", "string16_ne"); + break; } break; case Type_Struct: @@ -3035,6 +3043,24 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper case Token_LtEq: add_package_dependency(c, "runtime", "cstring_le"); break; case Token_GtEq: add_package_dependency(c, "runtime", "cstring_gt"); break; } + } else if (is_type_cstring16(x->type) && is_type_cstring16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "cstring16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "cstring16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "cstring16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "cstring16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "cstring16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "cstring16_gt"); break; + } + } else if (is_type_string16(x->type) || is_type_string16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "string16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "string16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "string16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "string16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "string16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "string16_gt"); break; + } } else if (is_type_string(x->type) || is_type_string(y->type)) { switch (op) { case Token_CmpEq: add_package_dependency(c, "runtime", "string_eq"); break; @@ -3340,6 +3366,11 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type return true; } + // []u16 <-> string16 (not cstring16) + if (is_type_u16_slice(src) && (is_type_string16(dst) && !is_type_cstring16(dst))) { + return true; + } + // cstring -> string if (are_types_identical(src, t_cstring) && are_types_identical(dst, t_string)) { if (operand->mode != Addressing_Constant) { @@ -3347,6 +3378,14 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type } return true; } + // cstring16 -> string16 + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + if (operand->mode != Addressing_Constant) { + add_package_dependency(c, "runtime", "cstring16_to_string16"); + } + return true; + } + // cstring -> ^u8 if (are_types_identical(src, t_cstring) && is_type_u8_ptr(dst)) { return !is_constant; @@ -3372,6 +3411,34 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type if (is_type_rawptr(src) && are_types_identical(dst, t_cstring)) { return !is_constant; } + + // cstring -> ^u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_ptr(dst)) { + return !is_constant; + } + // cstring -> [^]u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) { + return !is_constant; + } + // cstring -> rawptr + if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) { + return !is_constant; + } + + + // ^u16 -> cstring16 + if (is_type_u16_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // [^]u16 -> cstring + if (is_type_u16_multi_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // rawptr -> cstring16 + if (is_type_rawptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // proc <-> proc if (is_type_proc(src) && is_type_proc(dst)) { if (is_type_polymorphic(dst)) { @@ -4558,6 +4625,8 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar // target_type = t_untyped_nil; } else if (is_type_cstring(target_type)) { // target_type = t_untyped_nil; + } else if (is_type_cstring16(target_type)) { + // target_type = t_untyped_nil; } else if (!type_has_nil(target_type)) { operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -8226,6 +8295,7 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 case Type_Basic: if (t->Basic.kind == Basic_string) { if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); *max_count = o->value.value_string.len; } if (o->mode != Addressing_Constant) { @@ -8233,6 +8303,16 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 } o->type = t_u8; return true; + } else if (t->Basic.kind == Basic_string16) { + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + *max_count = o->value.value_string16.len; + } + if (o->mode != Addressing_Constant) { + o->mode = Addressing_Value; + } + o->type = t_u16; + return true; } else if (t->Basic.kind == Basic_UntypedString) { if (o->mode == Addressing_Constant) { *max_count = o->value.value_string.len; @@ -10879,9 +10959,17 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { valid = true; if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); max_count = o->value.value_string.len; } o->type = type_deref(o->type); + } else if (t->Basic.kind == Basic_string16) { + valid = true; + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + max_count = o->value.value_string16.len; + } + o->type = type_deref(o->type); } break; -- cgit v1.2.3 From ae02d3d02d2eb5132fa7c6573ed7db20d7e18f3e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 11:55:16 +0100 Subject: Begin supporting `string16` across the core library --- base/intrinsics/intrinsics.odin | 1 + base/runtime/print.odin | 6 +++ core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 + core/encoding/json/marshal.odin | 8 ++-- core/encoding/json/unmarshal.odin | 4 +- core/flags/internal_rtti.odin | 2 + core/fmt/fmt.odin | 16 ++++---- core/io/io.odin | 4 +- core/reflect/types.odin | 8 ++-- src/check_builtin.cpp | 2 + src/check_expr.cpp | 42 +++++++++++++++++---- src/checker_builtin_procs.hpp | 2 + src/llvm_backend.cpp | 6 +++ src/llvm_backend_const.cpp | 77 +++++++++++++++++++++++++++++++++++++-- src/llvm_backend_debug.cpp | 14 +++++++ src/llvm_backend_expr.cpp | 3 +- src/llvm_backend_general.cpp | 37 +++++++++++++++++++ src/llvm_backend_utility.cpp | 19 +++++++++- src/string.cpp | 7 ++++ 20 files changed, 230 insertions(+), 32 deletions(-) (limited to 'src/check_expr.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index be75739fe..d45d24f48 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool --- type_is_string :: proc($T: typeid) -> bool --- type_is_typeid :: proc($T: typeid) -> bool --- type_is_any :: proc($T: typeid) -> bool --- +type_is_string16 :: proc($T: typeid) -> bool --- type_is_endian_platform :: proc($T: typeid) -> bool --- type_is_endian_little :: proc($T: typeid) -> bool --- diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 145f002d1..85ed49445 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -293,7 +293,13 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_string("quaternion") print_u64(u64(8*ti.size)) case Type_Info_String: + if info.is_cstring { + print_byte('c') + } print_string("string") + if info.is_utf16 { + print_string("16") + } case Type_Info_Boolean: switch ti.id { case bool: print_string("bool") diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 17420af46..e0e69cbf5 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - + assert(!t.is_utf16) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 365ac5d6f..2840429f5 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -335,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: + assert(!t.is_utf16) + bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return if t.is_cstring { diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index ebb9a639c..cdb00a354 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -353,10 +353,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: #partial switch info in ti.variant { case runtime.Type_Info_String: switch x in v { - case string: - return x == "" - case cstring: - return x == nil || x == "" + case string: return x == "" + case cstring: return x == nil || x == "" + case string16: return x == "" + case cstring16: return x == nil || x == "" } case runtime.Type_Info_Any: return v.(any) == nil diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index b9ed1476f..51e7e3b81 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -570,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm key_ptr: rawptr #partial switch tk in t.key.variant { - case runtime.Type_Info_String: + case runtime.Type_Info_String: + assert(!tk.is_utf16) + key_ptr = rawptr(&key) key_cstr: cstring if reflect.is_cstring(t.key) { diff --git a/core/flags/internal_rtti.odin b/core/flags/internal_rtti.odin index 1c559ca55..58224cc87 100644 --- a/core/flags/internal_rtti.odin +++ b/core/flags/internal_rtti.odin @@ -127,6 +127,8 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: } case runtime.Type_Info_String: + assert(!specific_type_info.is_utf16) + if specific_type_info.is_cstring { cstr_ptr := (^cstring)(ptr) if cstr_ptr != nil { diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 7fe6287d4..9c245de94 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2346,14 +2346,14 @@ fmt_array :: proc(fi: ^Info, data: rawptr, n: int, elem_size: int, elem: ^reflec } switch reflect.type_info_base(elem).id { - case byte: fmt_string(fi, string(([^]byte)(data)[:n]), verb); return - case u16: print_utf16(fi, ([^]u16)(data)[:n]); return - case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return - case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return - case u32: print_utf32(fi, ([^]u32)(data)[:n]); return - case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return - case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return - case rune: print_utf32(fi, ([^]rune)(data)[:n]); return + case byte: fmt_string(fi, string (([^]byte)(data)[:n]), verb); return + case u16: fmt_string16(fi, string16(([^]u16) (data)[:n]), verb); return + case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return + case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return + case u32: print_utf32(fi, ([^]u32)(data)[:n]); return + case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return + case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return + case rune: print_utf32(fi, ([^]rune)(data)[:n]); return } } if verb == 'p' { diff --git a/core/io/io.odin b/core/io/io.odin index 5431519bf..c4eb6a073 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -319,7 +319,6 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int, write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) { for i := 0; i < len(str); i += 1 { r := rune(utf16.REPLACEMENT_CHAR) - switch c := str[i]; { case c < utf16._surr1, utf16._surr3 <= c: r = rune(c) @@ -329,7 +328,8 @@ write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: i i += 1 } - w, err := write_rune(s, r, n_written) + w: int + w, err = write_rune(s, r, n_written) n += w if err != nil { return diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 511c5c9bd..2351408cc 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -511,9 +511,11 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_i64(w, i64(8*ti.size), 10, &n) or_return case Type_Info_String: if info.is_cstring { - io.write_string(w, "cstring", &n) or_return - } else { - io.write_string(w, "string", &n) or_return + io.write_byte(w, 'c', &n) or_return + } + io.write_string(w, "string", &n) or_return + if info.is_utf16 { + io.write_string(w, "16", &n) or_return } case Type_Info_Boolean: switch ti.id { diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index d36cf4520..4abace637 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool is_type_complex, is_type_quaternion, is_type_string, + is_type_string16, is_type_typeid, is_type_any, is_type_endian_platform, @@ -6139,6 +6140,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_type_is_complex: case BuiltinProc_type_is_quaternion: case BuiltinProc_type_is_string: + case BuiltinProc_type_is_string16: case BuiltinProc_type_is_typeid: case BuiltinProc_type_is_any: case BuiltinProc_type_is_endian_platform: diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 57073e22f..8d2e4d637 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i } else if (is_type_boolean(type)) { return in_value.kind == ExactValue_Bool; } else if (is_type_string(type)) { + if (in_value.kind == ExactValue_String16) { + return is_type_string16(type) || is_type_cstring16(type); + } return in_value.kind == ExactValue_String; } else if (is_type_integer(type) || is_type_rune(type)) { if (in_value.kind == ExactValue_Bool) { @@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i if (in_value.kind == ExactValue_String) { return false; } + if (in_value.kind == ExactValue_String16) { + return false; + } if (out_value) *out_value = in_value; } else if (is_type_bit_set(type)) { if (in_value.kind == ExactValue_Integer) { @@ -4654,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar break; } } + } else if (operand->value.kind == ExactValue_String16) { + String16 s = operand->value.value_string16; + if (is_type_u16_array(t)) { + if (s.len == t->Array.count) { + break; + } + } } operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -4983,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v if (success_) *success_ = true; if (finish_) *finish_ = true; return exact_value_u64(val); + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(0 <= index && index < value.value_string.len); + u16 val = value.value_string16[index]; + if (success_) *success_ = true; + if (finish_) *finish_ = true; + return exact_value_u64(val); } if (value.kind != ExactValue_Compound) { if (success_) *success_ = true; @@ -11124,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, o->expr = node; return kind; } - - String s = {}; - if (o->value.kind == ExactValue_String) { - s = o->value.value_string; - } - o->mode = Addressing_Constant; o->type = t; - o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + + if (o->value.kind == ExactValue_String16) { + String16 s = o->value.value_string16; + + o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } else { + String s = {}; + if (o->value.kind == ExactValue_String) { + s = o->value.value_string; + } + + o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } } return kind; } @@ -11221,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast Type *t = t_invalid; switch (node->tav.value.kind) { case ExactValue_String: t = t_untyped_string; break; + case ExactValue_String16: t = t_string16; break; // TODO(bill): determine this correctly case ExactValue_Float: t = t_untyped_float; break; case ExactValue_Complex: t = t_untyped_complex; break; case ExactValue_Quaternion: t = t_untyped_quaternion; break; @@ -11657,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) { return !v.value_bool; case ExactValue_String: return v.value_string.len == 0; + case ExactValue_String16: + return v.value_string16.len == 0; case ExactValue_Integer: return big_int_is_zero(&v.value_integer); case ExactValue_Float: diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 8e135ab10..bff887d9e 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin, BuiltinProc_type_is_complex, BuiltinProc_type_is_quaternion, BuiltinProc_type_is_string, + BuiltinProc_type_is_string16, BuiltinProc_type_is_typeid, BuiltinProc_type_is_any, @@ -607,6 +608,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("type_is_complex"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_quaternion"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_string"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("type_is_string16"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_typeid"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 13a1d8cf3..f37415cc1 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) { case Basic_string: return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}"); + case Basic_string16: + return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}"); + case Basic_cstring: return str_lit("*"); + case Basic_cstring16: return str_lit("*"); + + case Basic_any: return str_lit("{any=^v^v}"); // rawptr + ^Type_Info case Basic_typeid: diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index c3112934e..8c05ed4a2 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) { gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(!is_type_string16(t)); + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + LLVMValueRef values[3] = { + data, + LLVMConstNull(lb_type(m, t_i32)), + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 3); + } else { + LLVMValueRef values[2] = { + data, + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 2); + } +} + +gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(is_type_string16(t)); if (build_context.metrics.ptr_size < build_context.metrics.int_size) { LLVMValueRef values[3] = { data, @@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) { return lb_const_value(m, t_string, exact_value_string(value)); } +gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) { + return lb_const_value(m, t_string16, exact_value_string16(value)); +} + gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) { lbValue res = {}; @@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb GB_ASSERT(is_type_slice(type)); res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value; return res; - } else { + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(is_type_slice(type)); + GB_PANIC("TODO(bill): UTF-16 String"); + return res; + }else { ast_node(cl, CompoundLit, value.value_compound); isize count = cl->elems.count; @@ -751,15 +778,23 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb { bool custom_link_section = cc.link_section.len > 0; - LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + LLVMValueRef ptr = nullptr; lbValue res = {}; res.type = default_type(original_type); + if (is_type_string16(res.type) || is_type_cstring16(res.type)) { + TEMPORARY_ALLOCATOR_GUARD(); + String16 s16 = string_to_string16(temporary_allocator(), value.value_string); + ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section); + } else { + ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + } + if (custom_link_section) { LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); } - if (is_type_cstring(res.type)) { + if (is_type_cstring(res.type) || is_type_cstring16(res.type)) { res.value = ptr; } else { if (value.value_string.len == 0) { @@ -768,12 +803,46 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true); GB_ASSERT(is_type_string(original_type)); - res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + if (is_type_string16(res.type)) { + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); + } else { + res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + } + } + + return res; + } + + case ExactValue_String16: + { + GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type)); + + bool custom_link_section = cc.link_section.len > 0; + + LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section); + lbValue res = {}; + res.type = default_type(original_type); + + if (custom_link_section) { + LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); + } + + if (is_type_cstring16(res.type)) { + res.value = ptr; + } else { + if (value.value_string16.len == 0) { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true); + GB_ASSERT(is_type_string(original_type)); + + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); } return res; } + case ExactValue_Integer: if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) { LLVMTypeRef t = lb_type(m, original_type); diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 024c5564e..182920fc7 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned); return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7); } + + case Basic_string16: + { + LLVMMetadataRef elements[2] = {}; + elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0); + elements[1] = lb_debug_struct_field(m, str_lit("len"), t_int, int_bits); + return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements)); + } + case Basic_cstring16: + { + LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned); + return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7); + } + case Basic_any: { LLVMMetadataRef elements[2] = {}; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index fbf0dea11..3463b6083 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -4354,12 +4354,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) { } - case Type_Basic: { // Basic_string + case Type_Basic: { // Basic_string/Basic_string16 lbValue str; lbValue elem; lbValue len; lbValue index; + str = lb_build_expr(p, ie->expr); if (deref) { str = lb_emit_load(p, str); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index d9771a75b..9ef1c23c0 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2715,6 +2715,43 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co } } +gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) { + // TODO(bill): caching for UTF-16 strings + + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csbs$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 1); + + LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + return ptr; +} + gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) { LLVMValueRef ptr = nullptr; if (str.len != 0) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index d4117b7ff..ea1bae4e9 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) { case Type_Basic: switch (t->Basic.kind) { case Basic_string: + case Basic_string16: case Basic_any: return true; @@ -981,7 +982,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) { } else if (build_context.ptr_size != build_context.int_size) { switch (t->kind) { case Type_Basic: - if (t->Basic.kind != Basic_string) { + if (t->Basic.kind != Basic_string && + t->Basic.kind != Basic_string16) { break; } /*fallthrough*/ @@ -1160,6 +1162,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { case 0: result_type = alloc_type_pointer(t->Slice.elem); break; case 1: result_type = t_int; break; } + } else if (is_type_string16(t)) { + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } } else if (is_type_string(t)) { switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1273,6 +1280,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) { switch (t->kind) { case Type_Basic: switch (t->Basic.kind) { + case Basic_string16: + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } + break; case Basic_string: switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1440,6 +1453,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection e = lb_emit_struct_ep(p, e, index); break; + case Basic_string16: + e = lb_emit_struct_ep(p, e, index); + break; + default: GB_PANIC("un-gep-able type %s", type_to_string(type)); break; diff --git a/src/string.cpp b/src/string.cpp index 8405938f4..8cc0e93f3 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -79,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) { return make_string(s.text+lo, hi-lo); } +gb_internal String16 substring(String16 const &s, isize lo, isize hi) { + isize max = s.len; + GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max); + + return make_string16(s.text+lo, hi-lo); +} + gb_internal char *alloc_cstring(gbAllocator a, String s) { char *c_str = gb_alloc_array(a, char, s.len+1); -- cgit v1.2.3 From bb4bc316a4bd86774953f1e8fcefffb5ed8bbf37 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 12:20:35 +0100 Subject: `for in string16`; Support `string16` across core --- base/runtime/internal.odin | 62 +++++++++++++++++ core/c/libc/locale.odin | 16 ++--- core/debug/trace/trace_windows.odin | 2 +- core/dynlib/lib_windows.odin | 2 +- core/mem/virtual/virtual_windows.odin | 2 +- core/os/dir_windows.odin | 2 +- core/os/os2/file_windows.odin | 2 +- core/os/stat_windows.odin | 4 +- core/path/filepath/path_windows.odin | 4 +- core/sys/info/platform_windows.odin | 12 ++-- core/sys/windows/comctl32.odin | 10 +-- core/sys/windows/ip_helper.odin | 6 +- core/sys/windows/types.odin | 4 +- core/sys/windows/util.odin | 50 +++++++------- core/time/timezone/tz_windows.odin | 8 +-- core/unicode/utf16/utf16.odin | 32 ++++++++- src/check_builtin.cpp | 6 +- src/check_expr.cpp | 2 +- src/check_stmt.cpp | 26 +++++++- src/llvm_backend_stmt.cpp | 121 +++++++++++++++++++++++++++++++++- src/llvm_backend_utility.cpp | 17 +++++ 21 files changed, 321 insertions(+), 69 deletions(-) (limited to 'src/check_expr.cpp') diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 660af58ab..4f9509b23 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -781,6 +781,68 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) { return r, size } + +string16_decode_rune :: #force_inline proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + w := 1 + switch c := s[0]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && 1 < len(s) && + _surr2 <= s[1] && s[1] < _surr3: + r1, r2 := rune(c), rune(s[1]) + if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w += 1 + } + return r, w +} + +string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + n := len(s)-1 + c := s[n] + w := 1 + if _surr2 <= c && c < _surr3 { + if n >= 1 { + r1 := rune(s[n-1]) + r2 := rune(c) + if _surr1 <= r1 && r1 < _surr2 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w = 2 + } + } else if c < _surr1 || _surr3 <= c { + r = rune(c) + } + return r, w +} + + + abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 { p, q := abs(real(x)), abs(imag(x)) if p < q { diff --git a/core/c/libc/locale.odin b/core/c/libc/locale.odin index 27317526c..3216e0f90 100644 --- a/core/c/libc/locale.odin +++ b/core/c/libc/locale.odin @@ -72,14 +72,14 @@ when ODIN_OS == .Windows { n_sep_by_space: c.char, p_sign_posn: c.char, n_sign_posn: c.char, - _W_decimal_point: [^]u16 `fmt:"s,0"`, - _W_thousands_sep: [^]u16 `fmt:"s,0"`, - _W_int_curr_symbol: [^]u16 `fmt:"s,0"`, - _W_currency_symbol: [^]u16 `fmt:"s,0"`, - _W_mon_decimal_point: [^]u16 `fmt:"s,0"`, - _W_mon_thousands_sep: [^]u16 `fmt:"s,0"`, - _W_positive_sign: [^]u16 `fmt:"s,0"`, - _W_negative_sign: [^]u16 `fmt:"s,0"`, + _W_decimal_point: cstring16, + _W_thousands_sep: cstring16, + _W_int_curr_symbol: cstring16, + _W_currency_symbol: cstring16, + _W_mon_decimal_point: cstring16, + _W_mon_thousands_sep: cstring16, + _W_positive_sign: cstring16, + _W_negative_sign: cstring16, } } else { lconv :: struct { diff --git a/core/debug/trace/trace_windows.odin b/core/debug/trace/trace_windows.odin index 96507714c..04e92f125 100644 --- a/core/debug/trace/trace_windows.odin +++ b/core/debug/trace/trace_windows.odin @@ -54,7 +54,7 @@ _resolve :: proc(ctx: ^Context, frame: Frame, allocator: runtime.Allocator) -> ( symbol.SizeOfStruct = size_of(symbol^) symbol.MaxNameLen = 255 if win32.SymFromAddrW(ctx.impl.hProcess, win32.DWORD64(frame), &{}, symbol) { - fl.procedure, _ = win32.wstring_to_utf8(&symbol.Name[0], -1, allocator) + fl.procedure, _ = win32.wstring_to_utf8(cstring16(&symbol.Name[0]), -1, allocator) } else { fl.procedure = fmt.aprintf("(procedure: 0x%x)", frame, allocator=allocator) } diff --git a/core/dynlib/lib_windows.odin b/core/dynlib/lib_windows.odin index 05cd2cb3c..95372dac6 100644 --- a/core/dynlib/lib_windows.odin +++ b/core/dynlib/lib_windows.odin @@ -13,7 +13,7 @@ _LIBRARY_FILE_EXTENSION :: "dll" _load_library :: proc(path: string, global_symbols: bool, allocator: runtime.Allocator) -> (Library, bool) { // NOTE(bill): 'global_symbols' is here only for consistency with POSIX which has RTLD_GLOBAL wide_path := win32.utf8_to_wstring(path, allocator) - defer free(wide_path, allocator) + defer free(rawptr(wide_path), allocator) handle := cast(Library)win32.LoadLibraryW(wide_path) return handle, handle != nil } diff --git a/core/mem/virtual/virtual_windows.odin b/core/mem/virtual/virtual_windows.odin index 0da8498d5..3fd4eeb68 100644 --- a/core/mem/virtual/virtual_windows.odin +++ b/core/mem/virtual/virtual_windows.odin @@ -72,7 +72,7 @@ foreign Kernel32 { flProtect: u32, dwMaximumSizeHigh: u32, dwMaximumSizeLow: u32, - lpName: [^]u16, + lpName: cstring16, ) -> rawptr --- MapViewOfFile :: proc( diff --git a/core/os/dir_windows.odin b/core/os/dir_windows.odin index ae3e6922c..40f4b9e9b 100644 --- a/core/os/dir_windows.odin +++ b/core/os/dir_windows.odin @@ -87,7 +87,7 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F defer delete(path) find_data := &win32.WIN32_FIND_DATAW{} - find_handle := win32.FindFirstFileW(raw_data(wpath_search), find_data) + find_handle := win32.FindFirstFileW(cstring16(raw_data(wpath_search)), find_data) if find_handle == win32.INVALID_HANDLE_VALUE { err = get_last_error() return dfi[:], err diff --git a/core/os/os2/file_windows.odin b/core/os/os2/file_windows.odin index 40d012183..1134e765c 100644 --- a/core/os/os2/file_windows.odin +++ b/core/os/os2/file_windows.odin @@ -619,7 +619,7 @@ _symlink :: proc(old_name, new_name: string) -> Error { return .Unsupported } -_open_sym_link :: proc(p: [^]u16) -> (handle: win32.HANDLE, err: Error) { +_open_sym_link :: proc(p: cstring16) -> (handle: win32.HANDLE, err: Error) { attrs := u32(win32.FILE_FLAG_BACKUP_SEMANTICS) attrs |= win32.FILE_FLAG_OPEN_REPARSE_POINT handle = win32.CreateFileW(p, 0, 0, nil, win32.OPEN_EXISTING, attrs, nil) diff --git a/core/os/stat_windows.odin b/core/os/stat_windows.odin index ca4f87668..662c9f9e6 100644 --- a/core/os/stat_windows.odin +++ b/core/os/stat_windows.odin @@ -17,7 +17,7 @@ full_path_from_name :: proc(name: string, allocator := context.allocator) -> (pa buf := make([dynamic]u16, 100) defer delete(buf) for { - n := win32.GetFullPathNameW(raw_data(p), u32(len(buf)), raw_data(buf), nil) + n := win32.GetFullPathNameW(cstring16(raw_data(p)), u32(len(buf)), cstring16(raw_data(buf)), nil) if n == 0 { return "", get_last_error() } @@ -154,7 +154,7 @@ cleanpath_from_handle_u16 :: proc(fd: Handle, allocator: runtime.Allocator) -> ( return nil, get_last_error() } buf := make([]u16, max(n, win32.DWORD(260))+1, allocator) - buf_len := win32.GetFinalPathNameByHandleW(h, raw_data(buf), n, 0) + buf_len := win32.GetFinalPathNameByHandleW(h, cstring16(raw_data(buf)), n, 0) return buf[:buf_len], nil } @(private, require_results) diff --git a/core/path/filepath/path_windows.odin b/core/path/filepath/path_windows.odin index 0dcb28cf8..24c6e00a5 100644 --- a/core/path/filepath/path_windows.odin +++ b/core/path/filepath/path_windows.odin @@ -61,13 +61,13 @@ temp_full_path :: proc(name: string) -> (path: string, err: os.Error) { } p := win32.utf8_to_utf16(name, ta) - n := win32.GetFullPathNameW(raw_data(p), 0, nil, nil) + n := win32.GetFullPathNameW(cstring16(raw_data(p)), 0, nil, nil) if n == 0 { return "", os.get_last_error() } buf := make([]u16, n, ta) - n = win32.GetFullPathNameW(raw_data(p), u32(len(buf)), raw_data(buf), nil) + n = win32.GetFullPathNameW(cstring16(raw_data(p)), u32(len(buf)), cstring16(raw_data(buf)), nil) if n == 0 { delete(buf) return "", os.get_last_error() diff --git a/core/sys/info/platform_windows.odin b/core/sys/info/platform_windows.odin index 4c00ddadf..dd1441d30 100644 --- a/core/sys/info/platform_windows.odin +++ b/core/sys/info/platform_windows.odin @@ -324,8 +324,8 @@ read_reg_string :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: string, ok status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_SZ, nil, raw_data(result_wide[:]), @@ -359,8 +359,8 @@ read_reg_i32 :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: i32, ok: bool result_size := sys.DWORD(size_of(i32)) status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_DWORD, nil, &res, @@ -386,8 +386,8 @@ read_reg_i64 :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: i64, ok: bool result_size := sys.DWORD(size_of(i64)) status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_QWORD, nil, &res, diff --git a/core/sys/windows/comctl32.odin b/core/sys/windows/comctl32.odin index d954f952c..c7a166634 100644 --- a/core/sys/windows/comctl32.odin +++ b/core/sys/windows/comctl32.odin @@ -573,10 +573,10 @@ Button_GetTextMargin :: #force_inline proc "system" (hwnd: HWND, pmargin: ^RECT) return cast(BOOL)SendMessageW(hwnd, BCM_GETTEXTMARGIN, 0, cast(LPARAM)uintptr(pmargin)) } Button_SetNote :: #force_inline proc "system" (hwnd: HWND, psz: LPCWSTR) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, BCM_SETNOTE, 0, cast(LPARAM)uintptr(psz)) + return cast(BOOL)SendMessageW(hwnd, BCM_SETNOTE, 0, cast(LPARAM)uintptr(rawptr(psz))) } Button_GetNote :: #force_inline proc "system" (hwnd: HWND, psz: LPCWSTR, pcc: ^c_int) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, BCM_GETNOTE, uintptr(pcc), cast(LPARAM)uintptr(psz)) + return cast(BOOL)SendMessageW(hwnd, BCM_GETNOTE, uintptr(pcc), cast(LPARAM)uintptr(rawptr(psz))) } Button_GetNoteLength :: #force_inline proc "system" (hwnd: HWND) -> LRESULT { return SendMessageW(hwnd, BCM_GETNOTELENGTH, 0, 0) @@ -604,10 +604,10 @@ EDITBALLOONTIP :: struct { PEDITBALLOONTIP :: ^EDITBALLOONTIP Edit_SetCueBannerText :: #force_inline proc "system" (hwnd: HWND, lpcwText: LPCWSTR) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, 0, cast(LPARAM)uintptr(lpcwText)) + return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, 0, cast(LPARAM)uintptr(rawptr(lpcwText))) } Edit_SetCueBannerTextFocused :: #force_inline proc "system" (hwnd: HWND, lpcwText: LPCWSTR, fDrawFocused: BOOL) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, cast(WPARAM)fDrawFocused, cast(LPARAM)uintptr(lpcwText)) + return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, cast(WPARAM)fDrawFocused, cast(LPARAM)uintptr(rawptr(lpcwText))) } Edit_GetCueBannerText :: #force_inline proc "system" (hwnd: HWND, lpwText: LPWSTR, cchText: LONG) -> BOOL { return cast(BOOL)SendMessageW(hwnd, EM_GETCUEBANNER, uintptr(lpwText), cast(LPARAM)cchText) @@ -1197,7 +1197,7 @@ ListView_GetItemPosition :: #force_inline proc "system" (hwnd: HWND, i: c_int, p return cast(BOOL)SendMessageW(hwnd, LVM_GETITEMPOSITION, cast(WPARAM)i, cast(LPARAM)uintptr(ppt)) } ListView_GetStringWidth :: #force_inline proc "system" (hwndLV: HWND, psz: LPCWSTR) -> c_int { - return cast(c_int)SendMessageW(hwndLV, LVM_GETSTRINGWIDTHW, 0, cast(LPARAM)uintptr(psz)) + return cast(c_int)SendMessageW(hwndLV, LVM_GETSTRINGWIDTHW, 0, cast(LPARAM)uintptr(rawptr(psz))) } ListView_HitTest :: #force_inline proc "system" (hwndLV: HWND, pinfo: ^LV_HITTESTINFO) -> c_int { return cast(c_int)SendMessageW(hwndLV, LVM_HITTEST, 0, cast(LPARAM)uintptr(pinfo)) diff --git a/core/sys/windows/ip_helper.odin b/core/sys/windows/ip_helper.odin index 7a6e545ac..d2e75d531 100644 --- a/core/sys/windows/ip_helper.odin +++ b/core/sys/windows/ip_helper.odin @@ -38,9 +38,9 @@ IP_Adapter_Addresses :: struct { FirstAnycastAddress: ^IP_ADAPTER_ANYCAST_ADDRESS_XP, FirstMulticastAddress: ^IP_ADAPTER_MULTICAST_ADDRESS_XP, FirstDnsServerAddress: ^IP_ADAPTER_DNS_SERVER_ADDRESS_XP, - DnsSuffix: ^u16, - Description: ^u16, - FriendlyName: ^u16, + DnsSuffix: cstring16, + Description: cstring16, + FriendlyName: cstring16, PhysicalAddress: [8]u8, PhysicalAddressLength: u32, Anonymous2: struct #raw_union { diff --git a/core/sys/windows/types.odin b/core/sys/windows/types.odin index 92b1cb15c..be16d2fdd 100644 --- a/core/sys/windows/types.odin +++ b/core/sys/windows/types.odin @@ -107,8 +107,8 @@ PDWORD64 :: ^DWORD64 PDWORD_PTR :: ^DWORD_PTR ATOM :: distinct WORD -wstring :: [^]WCHAR -PWSTR :: [^]WCHAR +wstring :: cstring16 +PWSTR :: cstring16 PBYTE :: ^BYTE LPBYTE :: ^BYTE diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 995e8e0e5..10dc907e7 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -122,14 +122,14 @@ utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> wstring { if res := utf8_to_utf16(s, allocator); len(res) > 0 { - return raw_data(res) + return wstring(raw_data(res)) } return nil } utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { if res := utf8_to_utf16(buf, s); len(res) > 0 { - return raw_data(res) + return wstring(raw_data(res)) } return nil } @@ -215,7 +215,7 @@ utf16_to_utf8_alloc :: proc(s: []u16, allocator := context.temp_allocator) -> (r if len(s) == 0 { return "", nil } - return wstring_to_utf8(raw_data(s), len(s), allocator) + return wstring_to_utf8(wstring(raw_data(s)), len(s), allocator) } /* @@ -236,7 +236,7 @@ utf16_to_utf8_buf :: proc(buf: []u8, s: []u16) -> (res: string) { if len(s) == 0 { return } - return wstring_to_utf8(buf, raw_data(s), len(s)) + return wstring_to_utf8(buf, wstring(raw_data(s)), len(s)) } utf16_to_utf8 :: proc{utf16_to_utf8_alloc, utf16_to_utf8_buf} @@ -298,7 +298,7 @@ _add_user :: proc(servername: string, username: string, password: string) -> (ok servername_w = nil } else { server := utf8_to_utf16(servername, context.temp_allocator) - servername_w = &server[0] + servername_w = wstring(&server[0]) } if len(username) == 0 || len(username) > LM20_UNLEN { @@ -348,7 +348,7 @@ get_computer_name_and_account_sid :: proc(username: string) -> (computer_name: s res := LookupAccountNameW( nil, // Look on this computer first - &username_w[0], + wstring(&username_w[0]), &sid, &cbsid, nil, @@ -364,10 +364,10 @@ get_computer_name_and_account_sid :: proc(username: string) -> (computer_name: s res = LookupAccountNameW( nil, - &username_w[0], + wstring(&username_w[0]), &sid, &cbsid, - &cname_w[0], + wstring(&cname_w[0]), &computer_name_size, &pe_use, ) @@ -390,7 +390,7 @@ get_sid :: proc(username: string, sid: ^SID) -> (ok: bool) { res := LookupAccountNameW( nil, // Look on this computer first - &username_w[0], + wstring(&username_w[0]), sid, &cbsid, nil, @@ -406,10 +406,10 @@ get_sid :: proc(username: string, sid: ^SID) -> (ok: bool) { res = LookupAccountNameW( nil, - &username_w[0], + wstring(&username_w[0]), sid, &cbsid, - &cname_w[0], + wstring(&cname_w[0]), &computer_name_size, &pe_use, ) @@ -428,7 +428,7 @@ add_user_to_group :: proc(sid: ^SID, group: string) -> (ok: NET_API_STATUS) { group_name := utf8_to_utf16(group, context.temp_allocator) ok = NetLocalGroupAddMembers( nil, - &group_name[0], + wstring(&group_name[0]), 0, &group_member, 1, @@ -443,7 +443,7 @@ add_del_from_group :: proc(sid: ^SID, group: string) -> (ok: NET_API_STATUS) { group_name := utf8_to_utf16(group, context.temp_allocator) ok = NetLocalGroupDelMembers( nil, - &group_name[0], + cstring16(&group_name[0]), 0, &group_member, 1, @@ -465,19 +465,19 @@ add_user_profile :: proc(username: string) -> (ok: bool, profile_path: string) { if res == false { return false, "" } - defer LocalFree(sb) + defer LocalFree(rawptr(sb)) pszProfilePath := make([]u16, 257, context.temp_allocator) res2 := CreateProfile( sb, - &username_w[0], - &pszProfilePath[0], + cstring16(&username_w[0]), + cstring16(&pszProfilePath[0]), 257, ) if res2 != 0 { return false, "" } - profile_path = wstring_to_utf8(&pszProfilePath[0], 257) or_else "" + profile_path = wstring_to_utf8(wstring(&pszProfilePath[0]), 257) or_else "" return true, profile_path } @@ -495,7 +495,7 @@ delete_user_profile :: proc(username: string) -> (ok: bool) { if res == false { return false } - defer LocalFree(sb) + defer LocalFree(rawptr(sb)) res2 := DeleteProfileW( sb, @@ -548,13 +548,13 @@ delete_user :: proc(servername: string, username: string) -> (ok: bool) { servername_w = nil } else { server := utf8_to_utf16(servername, context.temp_allocator) - servername_w = &server[0] + servername_w = wstring(&server[0]) } username_w := utf8_to_utf16(username) res := NetUserDel( servername_w, - &username_w[0], + wstring(&username_w[0]), ) if res != .Success { return false @@ -586,9 +586,9 @@ run_as_user :: proc(username, password, application, commandline: string, pi: ^P user_token: HANDLE ok = bool(LogonUserW( - lpszUsername = &username_w[0], - lpszDomain = &domain_w[0], - lpszPassword = &password_w[0], + lpszUsername = wstring(&username_w[0]), + lpszDomain = wstring(&domain_w[0]), + lpszPassword = wstring(&password_w[0]), dwLogonType = .NEW_CREDENTIALS, dwLogonProvider = .WINNT50, phToken = &user_token, @@ -605,8 +605,8 @@ run_as_user :: proc(username, password, application, commandline: string, pi: ^P ok = bool(CreateProcessAsUserW( user_token, - &app_w[0], - &commandline_w[0], + wstring(&app_w[0]), + wstring(&commandline_w[0]), nil, // lpProcessAttributes, nil, // lpThreadAttributes, false, // bInheritHandles, diff --git a/core/time/timezone/tz_windows.odin b/core/time/timezone/tz_windows.odin index 8dc5f533c..fe00719a2 100644 --- a/core/time/timezone/tz_windows.odin +++ b/core/time/timezone/tz_windows.odin @@ -159,9 +159,9 @@ iana_to_windows_tz :: proc(iana_name: string, allocator := context.allocator) -> status: windows.UError iana_name_wstr := windows.utf8_to_wstring(iana_name, allocator) - defer free(iana_name_wstr, allocator) + defer free(rawptr(iana_name_wstr), allocator) - wintz_name_len := windows.ucal_getWindowsTimeZoneID(iana_name_wstr, -1, raw_data(wintz_name_buffer[:]), len(wintz_name_buffer), &status) + wintz_name_len := windows.ucal_getWindowsTimeZoneID(iana_name_wstr, -1, cstring16(raw_data(wintz_name_buffer[:])), len(wintz_name_buffer), &status) if status != .U_ZERO_ERROR { return } @@ -178,7 +178,7 @@ local_tz_name :: proc(allocator := context.allocator) -> (name: string, success: iana_name_buffer: [128]u16 status: windows.UError - zone_str_len := windows.ucal_getDefaultTimeZone(raw_data(iana_name_buffer[:]), len(iana_name_buffer), &status) + zone_str_len := windows.ucal_getDefaultTimeZone(cstring16(raw_data(iana_name_buffer[:])), len(iana_name_buffer), &status) if status != .U_ZERO_ERROR { return } @@ -291,7 +291,7 @@ _region_load :: proc(reg_str: string, allocator := context.allocator) -> (out_re defer delete(tz_key, allocator) tz_key_wstr := windows.utf8_to_wstring(tz_key, allocator) - defer free(tz_key_wstr, allocator) + defer free(rawptr(tz_key_wstr), allocator) key: windows.HKEY res := windows.RegOpenKeyExW(windows.HKEY_LOCAL_MACHINE, tz_key_wstr, 0, windows.KEY_READ, &key) diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin index 9a8cfe438..d3f98584b 100644 --- a/core/unicode/utf16/utf16.odin +++ b/core/unicode/utf16/utf16.odin @@ -126,7 +126,37 @@ decode_rune_in_string :: proc(s: string16) -> (r: rune, width: int) { return } -rune_count :: proc(s: []u16) -> (n: int) { +string_to_runes :: proc "odin" (s: string16, allocator := context.allocator) -> (runes: []rune) { + n := rune_count(s) + + runes = make([]rune, n, allocator) + i := 0 + for r in s { + runes[i] = r + i += 1 + } + return +} + + +rune_count :: proc{ + rune_count_in_string, + rune_count_in_slice, +} +rune_count_in_string :: proc(s: string16) -> (n: int) { + for i := 0; i < len(s); i += 1 { + c := s[i] + if _surr1 <= c && c < _surr2 && i+1 < len(s) && + _surr2 <= s[i+1] && s[i+1] < _surr3 { + i += 1 + } + n += 1 + } + return +} + + +rune_count_in_slice :: proc(s: []u16) -> (n: int) { for i := 0; i < len(s); i += 1 { c := s[i] if _surr1 <= c && c < _surr2 && i+1 < len(s) && diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 4abace637..66ea0cfbd 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -7179,7 +7179,11 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } operand->mode = Addressing_Value; - operand->type = alloc_type_multi_pointer(t_u16); + if (type_hint != nullptr && is_type_cstring16(type_hint)) { + operand->type = type_hint; + } else { + operand->type = alloc_type_multi_pointer(t_u16); + } operand->value = {}; break; } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 8d2e4d637..34149f92b 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -3426,7 +3426,7 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) { return !is_constant; } - // cstring -> rawptr + // cstring16 -> rawptr if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) { return !is_constant; } diff --git a/src/check_stmt.cpp b/src/check_stmt.cpp index bc9b6c5dd..ae88ff333 100644 --- a/src/check_stmt.cpp +++ b/src/check_stmt.cpp @@ -974,7 +974,14 @@ gb_internal void check_unroll_range_stmt(CheckerContext *ctx, Ast *node, u32 mod Type *t = base_type(operand.type); switch (t->kind) { case Type_Basic: - if (is_type_string(t) && t->Basic.kind != Basic_cstring) { + if (is_type_string16(t) && t->Basic.kind != Basic_cstring) { + val0 = t_rune; + val1 = t_int; + inline_for_depth = exact_value_i64(operand.value.value_string.len); + if (unroll_count > 0) { + error(node, "#unroll(%lld) does not support strings", cast(long long)unroll_count); + } + } else if (is_type_string(t) && t->Basic.kind != Basic_cstring) { val0 = t_rune; val1 = t_int; inline_for_depth = exact_value_i64(operand.value.value_string.len); @@ -1236,7 +1243,11 @@ gb_internal void check_switch_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags add_to_seen_map(ctx, &seen, upper_op, x, lhs, rhs); - if (is_type_string(x.type)) { + if (is_type_string16(x.type)) { + // NOTE(bill): Force dependency for strings here + add_package_dependency(ctx, "runtime", "string16_le"); + add_package_dependency(ctx, "runtime", "string16_lt"); + } else if (is_type_string(x.type)) { // NOTE(bill): Force dependency for strings here add_package_dependency(ctx, "runtime", "string_le"); add_package_dependency(ctx, "runtime", "string_lt"); @@ -1770,7 +1781,16 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags) switch (t->kind) { case Type_Basic: - if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { + if (t->Basic.kind == Basic_string16) { + is_possibly_addressable = false; + array_add(&vals, t_rune); + array_add(&vals, t_int); + if (is_reverse) { + add_package_dependency(ctx, "runtime", "string16_decode_last_rune"); + } else { + add_package_dependency(ctx, "runtime", "string16_decode_rune"); + } + } else if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { is_possibly_addressable = false; array_add(&vals, t_rune); array_add(&vals, t_int); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 027837f3f..5481ca447 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -622,6 +622,121 @@ gb_internal void lb_build_range_string(lbProcedure *p, lbValue expr, Type *val_t if (done_) *done_ = done; } +gb_internal void lb_build_range_string16(lbProcedure *p, lbValue expr, Type *val_type, + lbValue *val_, lbValue *idx_, lbBlock **loop_, lbBlock **done_, + bool is_reverse) { + + lbModule *m = p->module; + lbValue count = lb_const_int(m, t_int, 0); + Type *expr_type = base_type(expr.type); + switch (expr_type->kind) { + case Type_Basic: + count = lb_string_len(p, expr); + break; + default: + GB_PANIC("Cannot do range_string of %s", type_to_string(expr_type)); + break; + } + + lbValue val = {}; + lbValue idx = {}; + lbBlock *loop = nullptr; + lbBlock *done = nullptr; + lbBlock *body = nullptr; + + loop = lb_create_block(p, "for.string16.loop"); + body = lb_create_block(p, "for.string16.body"); + done = lb_create_block(p, "for.string16.done"); + + lbAddr offset_ = lb_add_local_generated(p, t_int, false); + lbValue offset = {}; + lbValue cond = {}; + + if (!is_reverse) { + /* + for c, offset in str { + ... + } + + offset := 0 + for offset < len(str) { + c, _w := string16_decode_rune(str[offset:]) + ... + offset += _w + } + */ + lb_addr_store(p, offset_, lb_const_int(m, t_int, 0)); + + lb_emit_jump(p, loop); + lb_start_block(p, loop); + + + offset = lb_addr_load(p, offset_); + cond = lb_emit_comp(p, Token_Lt, offset, count); + } else { + // NOTE(bill): REVERSED LOGIC + /* + #reverse for c, offset in str { + ... + } + + offset := len(str) + for offset > 0 { + c, _w := string16_decode_last_rune(str[:offset]) + offset -= _w + ... + } + */ + lb_addr_store(p, offset_, count); + + lb_emit_jump(p, loop); + lb_start_block(p, loop); + + offset = lb_addr_load(p, offset_); + cond = lb_emit_comp(p, Token_Gt, offset, lb_const_int(m, t_int, 0)); + } + lb_emit_if(p, cond, body, done); + lb_start_block(p, body); + + + lbValue rune_and_len = {}; + if (!is_reverse) { + lbValue str_elem = lb_emit_ptr_offset(p, lb_string_elem(p, expr), offset); + lbValue str_len = lb_emit_arith(p, Token_Sub, count, offset, t_int); + auto args = array_make(permanent_allocator(), 1); + args[0] = lb_emit_string16(p, str_elem, str_len); + + rune_and_len = lb_emit_runtime_call(p, "string16_decode_rune", args); + lbValue len = lb_emit_struct_ev(p, rune_and_len, 1); + lb_addr_store(p, offset_, lb_emit_arith(p, Token_Add, offset, len, t_int)); + + idx = offset; + } else { + // NOTE(bill): REVERSED LOGIC + lbValue str_elem = lb_string_elem(p, expr); + lbValue str_len = offset; + auto args = array_make(permanent_allocator(), 1); + args[0] = lb_emit_string16(p, str_elem, str_len); + + rune_and_len = lb_emit_runtime_call(p, "string16_decode_last_rune", args); + lbValue len = lb_emit_struct_ev(p, rune_and_len, 1); + lb_addr_store(p, offset_, lb_emit_arith(p, Token_Sub, offset, len, t_int)); + + idx = lb_addr_load(p, offset_); + } + + + if (val_type != nullptr) { + val = lb_emit_struct_ev(p, rune_and_len, 0); + } + + if (val_) *val_ = val; + if (idx_) *idx_ = idx; + if (loop_) *loop_ = loop; + if (done_) *done_ = done; +} + + gb_internal Ast *lb_strip_and_prefix(Ast *ident) { if (ident != nullptr) { @@ -1138,7 +1253,11 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc } Type *t = base_type(string.type); GB_ASSERT(!is_type_cstring(t)); - lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + if (is_type_string16(t)) { + lb_build_range_string16(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + } else { + lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + } break; } case Type_Tuple: diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index ea1bae4e9..dcb95a9a2 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -191,6 +191,23 @@ gb_internal lbValue lb_emit_clamp(lbProcedure *p, Type *t, lbValue x, lbValue mi return z; } +gb_internal lbValue lb_emit_string16(lbProcedure *p, lbValue str_elem, lbValue str_len) { + if (false && lb_is_const(str_elem) && lb_is_const(str_len)) { + LLVMValueRef values[2] = { + str_elem.value, + str_len.value, + }; + lbValue res = {}; + res.type = t_string16; + res.value = llvm_const_named_struct(p->module, t_string16, values, gb_count_of(values)); + return res; + } else { + lbAddr res = lb_add_local_generated(p, t_string16, false); + lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 0), str_elem); + lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 1), str_len); + return lb_addr_load(p, res); + } +} gb_internal lbValue lb_emit_string(lbProcedure *p, lbValue str_elem, lbValue str_len) { -- cgit v1.2.3