diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2025-08-06 16:09:18 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-06 16:09:18 +0100 |
| commit | 09a1e170bc92a0ea48a8ee67599c2936e924fe4d (patch) | |
| tree | 92b44b34a1f2f0c4a8c96a49ab61bb5177432ed7 | |
| parent | ec7509430369eb5d57a081507792dc03b1c05bab (diff) | |
| parent | af3184adc96cef59fff986ea6400caa6dbdb56ae (diff) | |
Merge pull request #5530 from odin-lang/bill/utf16-strings
UTF-16 string types: `string16` & `cstring16`
66 files changed, 2218 insertions, 227 deletions
diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index be75739fe..d45d24f48 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -141,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool --- type_is_string :: proc($T: typeid) -> bool --- type_is_typeid :: proc($T: typeid) -> bool --- type_is_any :: proc($T: typeid) -> bool --- +type_is_string16 :: proc($T: typeid) -> bool --- type_is_endian_platform :: proc($T: typeid) -> bool --- type_is_endian_little :: proc($T: typeid) -> bool --- diff --git a/base/runtime/core.odin b/base/runtime/core.odin index baecb4146..478a3d307 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 { Dynamic = 3, } +Type_Info_String_Encoding_Kind :: enum u8 { + UTF_8 = 0, + UTF_16 = 1, +} + // Variant Types Type_Info_Named :: struct { name: string, @@ -73,7 +78,7 @@ Type_Info_Rune :: struct {} Type_Info_Float :: struct {endianness: Platform_Endianness} Type_Info_Complex :: struct {} Type_Info_Quaternion :: struct {} -Type_Info_String :: struct {is_cstring: bool} +Type_Info_String :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind} Type_Info_Boolean :: struct {} Type_Info_Any :: struct {} Type_Info_Type_Id :: struct {} @@ -397,6 +402,11 @@ Raw_String :: struct { len: int, } +Raw_String16 :: struct { + data: [^]u16, + len: int, +} + Raw_Slice :: struct { data: rawptr, len: int, @@ -450,6 +460,12 @@ Raw_Cstring :: struct { } #assert(size_of(Raw_Cstring) == size_of(cstring)) +Raw_Cstring16 :: struct { + data: [^]u16, +} +#assert(size_of(Raw_Cstring16) == size_of(cstring16)) + + Raw_Soa_Pointer :: struct { data: rawptr, index: int, diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin index e2ba14f3a..09118998c 100644 --- a/base/runtime/core_builtin.odin +++ b/base/runtime/core_builtin.odin @@ -86,11 +86,26 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int } return n } + +// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`. +// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum +// of len(src) and len(dst). +// +// Prefer the procedure group `copy`. +@builtin +copy_from_string16 :: proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int { + n := min(len(dst), len(src)) + if n > 0 { + intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(u16)) + } + return n +} + // `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`. // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum // of len(src) and len(dst). @builtin -copy :: proc{copy_slice, copy_from_string} +copy :: proc{copy_slice, copy_from_string, copy_from_string16} @@ -285,6 +300,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error } +@builtin +delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc) +} +@builtin +delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free((^u16)(str), allocator, loc) +} + // `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation. // // Note: Prefer `delete` over the specific `delete_*` procedures where possible. @@ -297,6 +321,8 @@ delete :: proc{ delete_map, delete_soa_slice, delete_soa_dynamic_array, + delete_string16, + delete_cstring16, } diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 907b187f1..4f9509b23 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -493,12 +493,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int { return ret } + +string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool { + x := transmute(Raw_String16)lhs + y := transmute(Raw_String16)rhs + if x.len != y.len { + return false + } + return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16)) +} + +string16_cmp :: proc "contextless" (a, b: string16) -> int { + x := transmute(Raw_String16)a + y := transmute(Raw_String16)b + + ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16)) + if ret == 0 && x.len != y.len { + return -1 if x.len < y.len else +1 + } + return ret +} + string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) } string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 } string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 } string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 } string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 } +string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) } +string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 } +string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 } +string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 } +string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 } + + cstring_len :: proc "contextless" (s: cstring) -> int { p0 := uintptr((^byte)(s)) p := p0 @@ -508,6 +536,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int { return int(p - p0) } +cstring16_len :: proc "contextless" (s: cstring16) -> int { + p := ([^]u16)(s) + n := 0 + for p != nil && p[0] != 0 { + p = p[1:] + n += 1 + } + return n +} + cstring_to_string :: proc "contextless" (s: cstring) -> string { if s == nil { return "" @@ -517,6 +555,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string { return transmute(string)Raw_String{ptr, n} } +cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 { + if s == nil { + return "" + } + ptr := (^u16)(s) + n := cstring16_len(s) + return transmute(string16)Raw_String16{ptr, n} +} + cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool { x := ([^]byte)(lhs) @@ -559,6 +606,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 } cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 } +cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return true + } + if (x == nil) ~ (y == nil) { + return false + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + if xn != yn { + return false + } + return #force_inline memory_equal(x, y, xn*size_of(u16)) +} + +cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return 0 + } + if (x == nil) ~ (y == nil) { + return -1 if x == nil else +1 + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + ret := memory_compare(x, y, min(xn, yn)*size_of(u16)) + if ret == 0 && xn != yn { + return -1 if xn < yn else +1 + } + return ret +} + +cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) } +cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 } +cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 } +cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 } +cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 } complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) } complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) } @@ -694,6 +781,68 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) { return r, size } + +string16_decode_rune :: #force_inline proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + w := 1 + switch c := s[0]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && 1 < len(s) && + _surr2 <= s[1] && s[1] < _surr3: + r1, r2 := rune(c), rune(s[1]) + if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w += 1 + } + return r, w +} + +string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) { + REPLACEMENT_CHAR :: '\ufffd' + _surr1 :: 0xd800 + _surr2 :: 0xdc00 + _surr3 :: 0xe000 + _surr_self :: 0x10000 + + r := rune(REPLACEMENT_CHAR) + + if len(s) < 1 { + return r, 0 + } + + n := len(s)-1 + c := s[n] + w := 1 + if _surr2 <= c && c < _surr3 { + if n >= 1 { + r1 := rune(s[n-1]) + r2 := rune(c) + if _surr1 <= r1 && r1 < _surr2 { + r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self + } + w = 2 + } + } else if c < _surr1 || _surr3 <= c { + r = rune(c) + } + return r, w +} + + + abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 { p, q := abs(real(x)), abs(imag(x)) if p < q { diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 145f002d1..2cfb6661b 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -293,7 +293,14 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_string("quaternion") print_u64(u64(8*ti.size)) case Type_Info_String: + if info.is_cstring { + print_byte('c') + } print_string("string") + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: print_string("16") + } case Type_Info_Boolean: switch ti.id { case bool: print_string("bool") diff --git a/core/c/libc/locale.odin b/core/c/libc/locale.odin index 27317526c..3216e0f90 100644 --- a/core/c/libc/locale.odin +++ b/core/c/libc/locale.odin @@ -72,14 +72,14 @@ when ODIN_OS == .Windows { n_sep_by_space: c.char, p_sign_posn: c.char, n_sign_posn: c.char, - _W_decimal_point: [^]u16 `fmt:"s,0"`, - _W_thousands_sep: [^]u16 `fmt:"s,0"`, - _W_int_curr_symbol: [^]u16 `fmt:"s,0"`, - _W_currency_symbol: [^]u16 `fmt:"s,0"`, - _W_mon_decimal_point: [^]u16 `fmt:"s,0"`, - _W_mon_thousands_sep: [^]u16 `fmt:"s,0"`, - _W_positive_sign: [^]u16 `fmt:"s,0"`, - _W_negative_sign: [^]u16 `fmt:"s,0"`, + _W_decimal_point: cstring16, + _W_thousands_sep: cstring16, + _W_int_curr_symbol: cstring16, + _W_currency_symbol: cstring16, + _W_mon_decimal_point: cstring16, + _W_mon_thousands_sep: cstring16, + _W_positive_sign: cstring16, + _W_negative_sign: cstring16, } } else { lconv :: struct { diff --git a/core/debug/trace/trace_windows.odin b/core/debug/trace/trace_windows.odin index 96507714c..04e92f125 100644 --- a/core/debug/trace/trace_windows.odin +++ b/core/debug/trace/trace_windows.odin @@ -54,7 +54,7 @@ _resolve :: proc(ctx: ^Context, frame: Frame, allocator: runtime.Allocator) -> ( symbol.SizeOfStruct = size_of(symbol^) symbol.MaxNameLen = 255 if win32.SymFromAddrW(ctx.impl.hProcess, win32.DWORD64(frame), &{}, symbol) { - fl.procedure, _ = win32.wstring_to_utf8(&symbol.Name[0], -1, allocator) + fl.procedure, _ = win32.wstring_to_utf8(cstring16(&symbol.Name[0]), -1, allocator) } else { fl.procedure = fmt.aprintf("(procedure: 0x%x)", frame, allocator=allocator) } diff --git a/core/dynlib/lib_windows.odin b/core/dynlib/lib_windows.odin index 05cd2cb3c..95372dac6 100644 --- a/core/dynlib/lib_windows.odin +++ b/core/dynlib/lib_windows.odin @@ -13,7 +13,7 @@ _LIBRARY_FILE_EXTENSION :: "dll" _load_library :: proc(path: string, global_symbols: bool, allocator: runtime.Allocator) -> (Library, bool) { // NOTE(bill): 'global_symbols' is here only for consistency with POSIX which has RTLD_GLOBAL wide_path := win32.utf8_to_wstring(path, allocator) - defer free(wide_path, allocator) + defer free(rawptr(wide_path), allocator) handle := cast(Library)win32.LoadLibraryW(wide_path) return handle, handle != nil } diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 17420af46..ae1664dfc 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - + assert(t.encoding == .UTF_8) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 365ac5d6f..043b2ec60 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -335,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: + assert(t.encoding == .UTF_8) + bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return if t.is_cstring { diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index ebb9a639c..cdb00a354 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -353,10 +353,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: #partial switch info in ti.variant { case runtime.Type_Info_String: switch x in v { - case string: - return x == "" - case cstring: - return x == nil || x == "" + case string: return x == "" + case cstring: return x == nil || x == "" + case string16: return x == "" + case cstring16: return x == nil || x == "" } case runtime.Type_Info_Any: return v.(any) == nil diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index b9ed1476f..0b65adaac 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -570,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm key_ptr: rawptr #partial switch tk in t.key.variant { - case runtime.Type_Info_String: + case runtime.Type_Info_String: + assert(tk.encoding == .UTF_8) + key_ptr = rawptr(&key) key_cstr: cstring if reflect.is_cstring(t.key) { diff --git a/core/flags/internal_rtti.odin b/core/flags/internal_rtti.odin index 1c559ca55..a1b050597 100644 --- a/core/flags/internal_rtti.odin +++ b/core/flags/internal_rtti.odin @@ -127,6 +127,8 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: } case runtime.Type_Info_String: + assert(specific_type_info.encoding == .UTF_8) + if specific_type_info.is_cstring { cstr_ptr := (^cstring)(ptr) if cstr_ptr != nil { diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 0f6470cca..9c245de94 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1551,6 +1551,79 @@ fmt_string :: proc(fi: ^Info, s: string, verb: rune) { fmt_cstring :: proc(fi: ^Info, s: cstring, verb: rune) { fmt_string(fi, string(s), verb) } + +// Formats a string UTF-16 with a specific format. +// +// Inputs: +// - fi: Pointer to the Info struct containing format settings. +// - s: The string to format. +// - verb: The format specifier character (e.g. 's', 'v', 'q', 'x', 'X'). +// +fmt_string16 :: proc(fi: ^Info, s: string16, verb: rune) { + s, verb := s, verb + if ol, ok := fi.optional_len.?; ok { + s = s[:clamp(ol, 0, len(s))] + } + if !fi.in_bad && fi.record_level > 0 && verb == 'v' { + verb = 'q' + } + + switch verb { + case 's', 'v': + if fi.width_set { + if fi.width > len(s) { + if fi.minus { + io.write_string16(fi.writer, s, &fi.n) + } + + for _ in 0..<fi.width - len(s) { + io.write_byte(fi.writer, ' ', &fi.n) + } + + if !fi.minus { + io.write_string16(fi.writer, s, &fi.n) + } + } else { + io.write_string16(fi.writer, s, &fi.n) + } + } else { + io.write_string16(fi.writer, s, &fi.n) + } + + case 'q', 'w': // quoted string + io.write_quoted_string16(fi.writer, s, '"', &fi.n) + + case 'x', 'X': + space := fi.space + fi.space = false + defer fi.space = space + + for i in 0..<len(s) { + if i > 0 && space { + io.write_byte(fi.writer, ' ', &fi.n) + } + char_set := __DIGITS_UPPER + if verb == 'x' { + char_set = __DIGITS_LOWER + } + _fmt_int(fi, u64(s[i]), 16, false, bit_size=16, digits=char_set) + } + + case: + fmt_bad_verb(fi, verb) + } +} +// Formats a C-style UTF-16 string with a specific format. +// +// Inputs: +// - fi: Pointer to the Info struct containing format settings. +// - s: The C-style string to format. +// - verb: The format specifier character (Ref fmt_string). +// +fmt_cstring16 :: proc(fi: ^Info, s: cstring16, verb: rune) { + fmt_string16(fi, string16(s), verb) +} + // Formats a raw pointer with a specific format. // // Inputs: @@ -2273,14 +2346,14 @@ fmt_array :: proc(fi: ^Info, data: rawptr, n: int, elem_size: int, elem: ^reflec } switch reflect.type_info_base(elem).id { - case byte: fmt_string(fi, string(([^]byte)(data)[:n]), verb); return - case u16: print_utf16(fi, ([^]u16)(data)[:n]); return - case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return - case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return - case u32: print_utf32(fi, ([^]u32)(data)[:n]); return - case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return - case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return - case rune: print_utf32(fi, ([^]rune)(data)[:n]); return + case byte: fmt_string(fi, string (([^]byte)(data)[:n]), verb); return + case u16: fmt_string16(fi, string16(([^]u16) (data)[:n]), verb); return + case u16le: print_utf16(fi, ([^]u16le)(data)[:n]); return + case u16be: print_utf16(fi, ([^]u16be)(data)[:n]); return + case u32: print_utf32(fi, ([^]u32)(data)[:n]); return + case u32le: print_utf32(fi, ([^]u32le)(data)[:n]); return + case u32be: print_utf32(fi, ([^]u32be)(data)[:n]); return + case rune: print_utf32(fi, ([^]rune)(data)[:n]); return } } if verb == 'p' { @@ -3210,6 +3283,9 @@ fmt_arg :: proc(fi: ^Info, arg: any, verb: rune) { case string: fmt_string(fi, a, verb) case cstring: fmt_cstring(fi, a, verb) + case string16: fmt_string16(fi, a, verb) + case cstring16: fmt_cstring16(fi, a, verb) + case typeid: reflect.write_typeid(fi.writer, a, &fi.n) case i16le: fmt_int(fi, u64(a), true, 16, verb) diff --git a/core/io/io.odin b/core/io/io.odin index c2b44cbdb..c4eb6a073 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -5,6 +5,7 @@ package io import "base:intrinsics" import "core:unicode/utf8" +import "core:unicode/utf16" // Seek whence values Seek_From :: enum { @@ -314,6 +315,29 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int, return write(s, transmute([]byte)str, n_written) } +// write_string16 writes the contents of the string16 s to w reencoded as utf-8 +write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) { + for i := 0; i < len(str); i += 1 { + r := rune(utf16.REPLACEMENT_CHAR) + switch c := str[i]; { + case c < utf16._surr1, utf16._surr3 <= c: + r = rune(c) + case utf16._surr1 <= c && c < utf16._surr2 && i+1 < len(str) && + utf16._surr2 <= str[i+1] && str[i+1] < utf16._surr3: + r = utf16.decode_surrogate_pair(rune(c), rune(str[i+1])) + i += 1 + } + + w: int + w, err = write_rune(s, r, n_written) + n += w + if err != nil { + return + } + } + return +} + // write_rune writes a UTF-8 encoded rune to w. write_rune :: proc(s: Writer, r: rune, n_written: ^int = nil) -> (size: int, err: Error) { defer if err == nil && n_written != nil { diff --git a/core/io/util.odin b/core/io/util.odin index fa98e007b..72983523a 100644 --- a/core/io/util.odin +++ b/core/io/util.odin @@ -264,6 +264,33 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written return } +write_quoted_string16 :: proc(w: Writer, str: string16, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) { + defer if n_written != nil { + n_written^ += n + } + write_byte(w, quote, &n) or_return + for width, s := 0, str; len(s) > 0; s = s[width:] { + r := rune(s[0]) + width = 1 + if r >= utf8.RUNE_SELF { + r, width = utf16.decode_rune_in_string(s) + } + if width == 1 && r == utf8.RUNE_ERROR { + write_byte(w, '\\', &n) or_return + write_byte(w, 'x', &n) or_return + write_byte(w, DIGITS_LOWER[s[0]>>4], &n) or_return + write_byte(w, DIGITS_LOWER[s[0]&0xf], &n) or_return + continue + } + + n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return + + } + write_byte(w, quote, &n) or_return + return +} + + // writer append a quoted rune into the byte buffer, return the written size write_quoted_rune :: proc(w: Writer, r: rune) -> (n: int) { _write_byte :: #force_inline proc(w: Writer, c: byte) -> int { diff --git a/core/mem/virtual/virtual_windows.odin b/core/mem/virtual/virtual_windows.odin index 0da8498d5..3fd4eeb68 100644 --- a/core/mem/virtual/virtual_windows.odin +++ b/core/mem/virtual/virtual_windows.odin @@ -72,7 +72,7 @@ foreign Kernel32 { flProtect: u32, dwMaximumSizeHigh: u32, dwMaximumSizeLow: u32, - lpName: [^]u16, + lpName: cstring16, ) -> rawptr --- MapViewOfFile :: proc( diff --git a/core/os/dir_windows.odin b/core/os/dir_windows.odin index ae3e6922c..40f4b9e9b 100644 --- a/core/os/dir_windows.odin +++ b/core/os/dir_windows.odin @@ -87,7 +87,7 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F defer delete(path) find_data := &win32.WIN32_FIND_DATAW{} - find_handle := win32.FindFirstFileW(raw_data(wpath_search), find_data) + find_handle := win32.FindFirstFileW(cstring16(raw_data(wpath_search)), find_data) if find_handle == win32.INVALID_HANDLE_VALUE { err = get_last_error() return dfi[:], err diff --git a/core/os/os2/dir_windows.odin b/core/os/os2/dir_windows.odin index 4cf1f8396..6c754a677 100644 --- a/core/os/os2/dir_windows.odin +++ b/core/os/os2/dir_windows.odin @@ -16,7 +16,7 @@ find_data_to_file_info :: proc(base_path: string, d: ^win32.WIN32_FIND_DATAW, al } temp_allocator := TEMP_ALLOCATOR_GUARD({ allocator }) - path := concatenate({base_path, `\`, win32_wstring_to_utf8(raw_data(d.cFileName[:]), temp_allocator) or_else ""}, allocator) or_return + path := concatenate({base_path, `\`, win32_wstring_to_utf8(cstring16(raw_data(d.cFileName[:])), temp_allocator) or_else ""}, allocator) or_return handle := win32.HANDLE(_open_internal(path, {.Read}, 0o666) or_else 0) defer win32.CloseHandle(handle) @@ -107,15 +107,7 @@ _read_directory_iterator_init :: proc(it: ^Read_Directory_Iterator, f: ^File) { return } - wpath: []u16 - { - i := 0 - for impl.wname[i] != 0 { - i += 1 - } - wpath = impl.wname[:i] - } - + wpath := string16(impl.wname) temp_allocator := TEMP_ALLOCATOR_GUARD({}) wpath_search := make([]u16, len(wpath)+3, temp_allocator) @@ -124,7 +116,7 @@ _read_directory_iterator_init :: proc(it: ^Read_Directory_Iterator, f: ^File) { wpath_search[len(wpath)+1] = '*' wpath_search[len(wpath)+2] = 0 - it.impl.find_handle = win32.FindFirstFileW(raw_data(wpath_search), &it.impl.find_data) + it.impl.find_handle = win32.FindFirstFileW(cstring16(raw_data(wpath_search)), &it.impl.find_data) if it.impl.find_handle == win32.INVALID_HANDLE_VALUE { read_directory_iterator_set_error(it, impl.name, _get_platform_error()) return diff --git a/core/os/os2/env_windows.odin b/core/os/os2/env_windows.odin index 55b2bb5ee..d389f8860 100644 --- a/core/os/os2/env_windows.odin +++ b/core/os/os2/env_windows.odin @@ -31,7 +31,7 @@ _lookup_env_alloc :: proc(key: string, allocator: runtime.Allocator) -> (value: return "", false } - value = win32_utf16_to_utf8(b[:n], allocator) or_else "" + value = win32_utf16_to_utf8(string16(b[:n]), allocator) or_else "" found = true return } diff --git a/core/os/os2/file_windows.odin b/core/os/os2/file_windows.odin index dbac78d0e..25e9cb4b0 100644 --- a/core/os/os2/file_windows.odin +++ b/core/os/os2/file_windows.odin @@ -247,7 +247,7 @@ _destroy :: proc(f: ^File_Impl) -> Error { } a := f.allocator - err0 := free(f.wname, a) + err0 := free(rawptr(f.wname), a) err1 := delete(f.name, a) err2 := delete(f.r_buf, a) err3 := delete(f.w_buf, a) @@ -619,7 +619,7 @@ _symlink :: proc(old_name, new_name: string) -> Error { return .Unsupported } -_open_sym_link :: proc(p: [^]u16) -> (handle: win32.HANDLE, err: Error) { +_open_sym_link :: proc(p: cstring16) -> (handle: win32.HANDLE, err: Error) { attrs := u32(win32.FILE_FLAG_BACKUP_SEMANTICS) attrs |= win32.FILE_FLAG_OPEN_REPARSE_POINT handle = win32.CreateFileW(p, 0, 0, nil, win32.OPEN_EXISTING, attrs, nil) @@ -661,7 +661,7 @@ _normalize_link_path :: proc(p: []u16, allocator: runtime.Allocator) -> (str: st } - handle := _open_sym_link(raw_data(p)) or_return + handle := _open_sym_link(cstring16(raw_data(p))) or_return defer win32.CloseHandle(handle) n := win32.GetFinalPathNameByHandleW(handle, nil, 0, win32.VOLUME_NAME_DOS) @@ -672,7 +672,7 @@ _normalize_link_path :: proc(p: []u16, allocator: runtime.Allocator) -> (str: st temp_allocator := TEMP_ALLOCATOR_GUARD({ allocator }) buf := make([]u16, n+1, temp_allocator) - n = win32.GetFinalPathNameByHandleW(handle, raw_data(buf), u32(len(buf)), win32.VOLUME_NAME_DOS) + n = win32.GetFinalPathNameByHandleW(handle, cstring16(raw_data(buf)), u32(len(buf)), win32.VOLUME_NAME_DOS) if n == 0 { return "", _get_platform_error() } @@ -713,7 +713,7 @@ _read_link :: proc(name: string, allocator: runtime.Allocator) -> (s: string, er switch rdb.ReparseTag { case win32.IO_REPARSE_TAG_SYMLINK: rb := (^win32.SYMBOLIC_LINK_REPARSE_BUFFER)(&rdb.rest) - pb := win32.wstring(&rb.PathBuffer) + pb := ([^]u16)(&rb.PathBuffer) pb[rb.SubstituteNameOffset+rb.SubstituteNameLength] = 0 p := pb[rb.SubstituteNameOffset:][:rb.SubstituteNameLength] if rb.Flags & win32.SYMLINK_FLAG_RELATIVE != 0 { @@ -723,7 +723,7 @@ _read_link :: proc(name: string, allocator: runtime.Allocator) -> (s: string, er case win32.IO_REPARSE_TAG_MOUNT_POINT: rb := (^win32.MOUNT_POINT_REPARSE_BUFFER)(&rdb.rest) - pb := win32.wstring(&rb.PathBuffer) + pb := ([^]u16)(&rb.PathBuffer) pb[rb.SubstituteNameOffset+rb.SubstituteNameLength] = 0 p := pb[rb.SubstituteNameOffset:][:rb.SubstituteNameLength] return _normalize_link_path(p, allocator) @@ -874,8 +874,8 @@ _file_stream_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, @(private="package", require_results) -win32_utf8_to_wstring :: proc(s: string, allocator: runtime.Allocator) -> (ws: [^]u16, err: runtime.Allocator_Error) { - ws = raw_data(win32_utf8_to_utf16(s, allocator) or_return) +win32_utf8_to_wstring :: proc(s: string, allocator: runtime.Allocator) -> (ws: cstring16, err: runtime.Allocator_Error) { + ws = cstring16(raw_data(win32_utf8_to_utf16(s, allocator) or_return)) return } @@ -909,24 +909,60 @@ win32_utf8_to_utf16 :: proc(s: string, allocator: runtime.Allocator) -> (ws: []u } @(private="package", require_results) -win32_wstring_to_utf8 :: proc(s: [^]u16, allocator: runtime.Allocator) -> (res: string, err: runtime.Allocator_Error) { - if s == nil || s[0] == 0 { +win32_wstring_to_utf8 :: proc(s: cstring16, allocator: runtime.Allocator) -> (res: string, err: runtime.Allocator_Error) { + if s == nil || s == "" { return "", nil } - n := 0 - for s[n] != 0 { - n += 1 + return win32_utf16_to_utf8(string16(s), allocator) +} + +@(private="package") +win32_utf16_to_utf8 :: proc{ + win32_utf16_string16_to_utf8, + win32_utf16_u16_to_utf8, +} + +@(private="package", require_results) +win32_utf16_string16_to_utf8 :: proc(s: string16, allocator: runtime.Allocator) -> (res: string, err: runtime.Allocator_Error) { + if len(s) == 0 { + return } - return win32_utf16_to_utf8(s[:n], allocator) + + n := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, cstring16(raw_data(s)), i32(len(s)), nil, 0, nil, nil) + if n == 0 { + return + } + + // If N < 0 the call to WideCharToMultiByte assume the wide string is null terminated + // and will scan it to find the first null terminated character. The resulting string will + // also be null terminated. + // If N > 0 it assumes the wide string is not null terminated and the resulting string + // will not be null terminated. + text := make([]byte, n, allocator) or_return + + n1 := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, cstring16(raw_data(s)), i32(len(s)), raw_data(text), n, nil, nil) + if n1 == 0 { + delete(text, allocator) + return + } + + for i in 0..<n { + if text[i] == 0 { + n = i + break + } + } + res = string(text[:n]) + return } @(private="package", require_results) -win32_utf16_to_utf8 :: proc(s: []u16, allocator: runtime.Allocator) -> (res: string, err: runtime.Allocator_Error) { +win32_utf16_u16_to_utf8 :: proc(s: []u16, allocator: runtime.Allocator) -> (res: string, err: runtime.Allocator_Error) { if len(s) == 0 { return } - n := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, raw_data(s), i32(len(s)), nil, 0, nil, nil) + n := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, cstring16(raw_data(s)), i32(len(s)), nil, 0, nil, nil) if n == 0 { return } @@ -938,7 +974,7 @@ win32_utf16_to_utf8 :: proc(s: []u16, allocator: runtime.Allocator) -> (res: str // will not be null terminated. text := make([]byte, n, allocator) or_return - n1 := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, raw_data(s), i32(len(s)), raw_data(text), n, nil, nil) + n1 := win32.WideCharToMultiByte(win32.CP_UTF8, win32.WC_ERR_INVALID_CHARS, cstring16(raw_data(s)), i32(len(s)), raw_data(text), n, nil, nil) if n1 == 0 { delete(text, allocator) return diff --git a/core/os/os2/path_windows.odin b/core/os/os2/path_windows.odin index c2e51040f..e0a00b07a 100644 --- a/core/os/os2/path_windows.odin +++ b/core/os/os2/path_windows.odin @@ -91,11 +91,11 @@ _remove_all :: proc(path: string) -> Error { nil, win32.FO_DELETE, dir, - &empty[0], + cstring16(&empty[0]), win32.FOF_NOCONFIRMATION | win32.FOF_NOERRORUI | win32.FOF_SILENT, false, nil, - &empty[0], + cstring16(&empty[0]), } res := win32.SHFileOperationW(&file_op) if res != 0 { @@ -303,13 +303,13 @@ _get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absol } temp_allocator := TEMP_ALLOCATOR_GUARD({ allocator }) rel_utf16 := win32.utf8_to_utf16(rel, temp_allocator) - n := win32.GetFullPathNameW(raw_data(rel_utf16), 0, nil, nil) + n := win32.GetFullPathNameW(cstring16(raw_data(rel_utf16)), 0, nil, nil) if n == 0 { return "", Platform_Error(win32.GetLastError()) } buf := make([]u16, n, temp_allocator) or_return - n = win32.GetFullPathNameW(raw_data(rel_utf16), u32(n), raw_data(buf), nil) + n = win32.GetFullPathNameW(cstring16(raw_data(rel_utf16)), u32(n), cstring16(raw_data(buf)), nil) if n == 0 { return "", Platform_Error(win32.GetLastError()) } diff --git a/core/os/os2/process_windows.odin b/core/os/os2/process_windows.odin index 199e5ad74..990da6616 100644 --- a/core/os/os2/process_windows.odin +++ b/core/os/os2/process_windows.odin @@ -175,7 +175,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator info.fields += {.Command_Line} } if .Command_Args in selection { - info.command_args = _parse_command_line(raw_data(cmdline_w), allocator) or_return + info.command_args = _parse_command_line(cstring16(raw_data(cmdline_w)), allocator) or_return info.fields += {.Command_Args} } } @@ -286,7 +286,7 @@ _process_info_by_handle :: proc(process: Process, selection: Process_Info_Fields info.fields += {.Command_Line} } if .Command_Args in selection { - info.command_args = _parse_command_line(raw_data(cmdline_w), allocator) or_return + info.command_args = _parse_command_line(cstring16(raw_data(cmdline_w)), allocator) or_return info.fields += {.Command_Args} } } @@ -610,7 +610,7 @@ _process_exe_by_pid :: proc(pid: int, allocator: runtime.Allocator) -> (exe_path err =_get_platform_error() return } - return win32_wstring_to_utf8(raw_data(entry.szExePath[:]), allocator) + return win32_wstring_to_utf8(cstring16(raw_data(entry.szExePath[:])), allocator) } _get_process_user :: proc(process_handle: win32.HANDLE, allocator: runtime.Allocator) -> (full_username: string, err: Error) { @@ -650,7 +650,7 @@ _get_process_user :: proc(process_handle: win32.HANDLE, allocator: runtime.Alloc return strings.concatenate({domain, "\\", username}, allocator) } -_parse_command_line :: proc(cmd_line_w: [^]u16, allocator: runtime.Allocator) -> (argv: []string, err: Error) { +_parse_command_line :: proc(cmd_line_w: cstring16, allocator: runtime.Allocator) -> (argv: []string, err: Error) { argc: i32 argv_w := win32.CommandLineToArgvW(cmd_line_w, &argc) if argv_w == nil { diff --git a/core/os/os2/stat_windows.odin b/core/os/os2/stat_windows.odin index 3cdc80405..3dee42be6 100644 --- a/core/os/os2/stat_windows.odin +++ b/core/os/os2/stat_windows.odin @@ -49,12 +49,12 @@ full_path_from_name :: proc(name: string, allocator: runtime.Allocator) -> (path p := win32_utf8_to_utf16(name, temp_allocator) or_return - n := win32.GetFullPathNameW(raw_data(p), 0, nil, nil) + n := win32.GetFullPathNameW(cstring16(raw_data(p)), 0, nil, nil) if n == 0 { return "", _get_platform_error() } buf := make([]u16, n+1, temp_allocator) - n = win32.GetFullPathNameW(raw_data(p), u32(len(buf)), raw_data(buf), nil) + n = win32.GetFullPathNameW(cstring16(raw_data(p)), u32(len(buf)), cstring16(raw_data(buf)), nil) if n == 0 { return "", _get_platform_error() } @@ -140,8 +140,8 @@ _cleanpath_from_handle :: proc(f: ^File, allocator: runtime.Allocator) -> (strin temp_allocator := TEMP_ALLOCATOR_GUARD({ allocator }) buf := make([]u16, max(n, 260)+1, temp_allocator) - n = win32.GetFinalPathNameByHandleW(h, raw_data(buf), u32(len(buf)), 0) - return _cleanpath_from_buf(buf[:n], allocator) + n = win32.GetFinalPathNameByHandleW(h, cstring16(raw_data(buf)), u32(len(buf)), 0) + return _cleanpath_from_buf(string16(buf[:n]), allocator) } _cleanpath_from_handle_u16 :: proc(f: ^File) -> ([]u16, Error) { @@ -158,12 +158,12 @@ _cleanpath_from_handle_u16 :: proc(f: ^File) -> ([]u16, Error) { temp_allocator := TEMP_ALLOCATOR_GUARD({}) buf := make([]u16, max(n, 260)+1, temp_allocator) - n = win32.GetFinalPathNameByHandleW(h, raw_data(buf), u32(len(buf)), 0) + n = win32.GetFinalPathNameByHandleW(h, cstring16(raw_data(buf)), u32(len(buf)), 0) return _cleanpath_strip_prefix(buf[:n]), nil } -_cleanpath_from_buf :: proc(buf: []u16, allocator: runtime.Allocator) -> (string, runtime.Allocator_Error) { - buf := buf +_cleanpath_from_buf :: proc(buf: string16, allocator: runtime.Allocator) -> (string, runtime.Allocator_Error) { + buf := transmute([]u16)buf buf = _cleanpath_strip_prefix(buf) return win32_utf16_to_utf8(buf, allocator) } diff --git a/core/os/os2/temp_file_windows.odin b/core/os/os2/temp_file_windows.odin index 9d75ef99d..91ea284a1 100644 --- a/core/os/os2/temp_file_windows.odin +++ b/core/os/os2/temp_file_windows.odin @@ -12,12 +12,12 @@ _temp_dir :: proc(allocator: runtime.Allocator) -> (string, runtime.Allocator_Er temp_allocator := TEMP_ALLOCATOR_GUARD({ allocator }) b := make([]u16, max(win32.MAX_PATH, n), temp_allocator) - n = win32.GetTempPathW(u32(len(b)), raw_data(b)) + n = win32.GetTempPathW(u32(len(b)), cstring16(raw_data(b))) if n == 3 && b[1] == ':' && b[2] == '\\' { } else if n > 0 && b[n-1] == '\\' { n -= 1 } - return win32_utf16_to_utf8(b[:n], allocator) + return win32_utf16_to_utf8(string16(b[:n]), allocator) } diff --git a/core/os/os2/user_windows.odin b/core/os/os2/user_windows.odin index d68f933ce..75d0ba6ac 100644 --- a/core/os/os2/user_windows.odin +++ b/core/os/os2/user_windows.odin @@ -74,6 +74,5 @@ _get_known_folder_path :: proc(rfid: win32.REFKNOWNFOLDERID, allocator: runtime. return "", .Invalid_Path } - dir, _ = win32.wstring_to_utf8(path_w, -1, allocator) - return + return win32_wstring_to_utf8(cstring16(path_w), allocator) }
\ No newline at end of file diff --git a/core/os/stat_windows.odin b/core/os/stat_windows.odin index ca4f87668..662c9f9e6 100644 --- a/core/os/stat_windows.odin +++ b/core/os/stat_windows.odin @@ -17,7 +17,7 @@ full_path_from_name :: proc(name: string, allocator := context.allocator) -> (pa buf := make([dynamic]u16, 100) defer delete(buf) for { - n := win32.GetFullPathNameW(raw_data(p), u32(len(buf)), raw_data(buf), nil) + n := win32.GetFullPathNameW(cstring16(raw_data(p)), u32(len(buf)), cstring16(raw_data(buf)), nil) if n == 0 { return "", get_last_error() } @@ -154,7 +154,7 @@ cleanpath_from_handle_u16 :: proc(fd: Handle, allocator: runtime.Allocator) -> ( return nil, get_last_error() } buf := make([]u16, max(n, win32.DWORD(260))+1, allocator) - buf_len := win32.GetFinalPathNameByHandleW(h, raw_data(buf), n, 0) + buf_len := win32.GetFinalPathNameByHandleW(h, cstring16(raw_data(buf)), n, 0) return buf[:buf_len], nil } @(private, require_results) diff --git a/core/path/filepath/path_windows.odin b/core/path/filepath/path_windows.odin index 0dcb28cf8..24c6e00a5 100644 --- a/core/path/filepath/path_windows.odin +++ b/core/path/filepath/path_windows.odin @@ -61,13 +61,13 @@ temp_full_path :: proc(name: string) -> (path: string, err: os.Error) { } p := win32.utf8_to_utf16(name, ta) - n := win32.GetFullPathNameW(raw_data(p), 0, nil, nil) + n := win32.GetFullPathNameW(cstring16(raw_data(p)), 0, nil, nil) if n == 0 { return "", os.get_last_error() } buf := make([]u16, n, ta) - n = win32.GetFullPathNameW(raw_data(p), u32(len(buf)), raw_data(buf), nil) + n = win32.GetFullPathNameW(cstring16(raw_data(p)), u32(len(buf)), cstring16(raw_data(buf)), nil) if n == 0 { delete(buf) return "", os.get_last_error() diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 511c5c9bd..98b7b368f 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -511,9 +511,12 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_i64(w, i64(8*ti.size), 10, &n) or_return case Type_Info_String: if info.is_cstring { - io.write_string(w, "cstring", &n) or_return - } else { - io.write_string(w, "string", &n) or_return + io.write_byte(w, 'c', &n) or_return + } + io.write_string(w, "string", &n) or_return + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: io.write_string(w, "16", &n) or_return } case Type_Info_Boolean: switch ti.id { diff --git a/core/sys/info/platform_windows.odin b/core/sys/info/platform_windows.odin index 4c00ddadf..dd1441d30 100644 --- a/core/sys/info/platform_windows.odin +++ b/core/sys/info/platform_windows.odin @@ -324,8 +324,8 @@ read_reg_string :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: string, ok status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_SZ, nil, raw_data(result_wide[:]), @@ -359,8 +359,8 @@ read_reg_i32 :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: i32, ok: bool result_size := sys.DWORD(size_of(i32)) status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_DWORD, nil, &res, @@ -386,8 +386,8 @@ read_reg_i64 :: proc(hkey: sys.HKEY, subkey, val: string) -> (res: i64, ok: bool result_size := sys.DWORD(size_of(i64)) status := sys.RegGetValueW( hkey, - &key_name_wide[0], - &val_name_wide[0], + cstring16(&key_name_wide[0]), + cstring16(&val_name_wide[0]), sys.RRF_RT_REG_QWORD, nil, &res, diff --git a/core/sys/windows/comctl32.odin b/core/sys/windows/comctl32.odin index d954f952c..c7a166634 100644 --- a/core/sys/windows/comctl32.odin +++ b/core/sys/windows/comctl32.odin @@ -573,10 +573,10 @@ Button_GetTextMargin :: #force_inline proc "system" (hwnd: HWND, pmargin: ^RECT) return cast(BOOL)SendMessageW(hwnd, BCM_GETTEXTMARGIN, 0, cast(LPARAM)uintptr(pmargin)) } Button_SetNote :: #force_inline proc "system" (hwnd: HWND, psz: LPCWSTR) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, BCM_SETNOTE, 0, cast(LPARAM)uintptr(psz)) + return cast(BOOL)SendMessageW(hwnd, BCM_SETNOTE, 0, cast(LPARAM)uintptr(rawptr(psz))) } Button_GetNote :: #force_inline proc "system" (hwnd: HWND, psz: LPCWSTR, pcc: ^c_int) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, BCM_GETNOTE, uintptr(pcc), cast(LPARAM)uintptr(psz)) + return cast(BOOL)SendMessageW(hwnd, BCM_GETNOTE, uintptr(pcc), cast(LPARAM)uintptr(rawptr(psz))) } Button_GetNoteLength :: #force_inline proc "system" (hwnd: HWND) -> LRESULT { return SendMessageW(hwnd, BCM_GETNOTELENGTH, 0, 0) @@ -604,10 +604,10 @@ EDITBALLOONTIP :: struct { PEDITBALLOONTIP :: ^EDITBALLOONTIP Edit_SetCueBannerText :: #force_inline proc "system" (hwnd: HWND, lpcwText: LPCWSTR) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, 0, cast(LPARAM)uintptr(lpcwText)) + return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, 0, cast(LPARAM)uintptr(rawptr(lpcwText))) } Edit_SetCueBannerTextFocused :: #force_inline proc "system" (hwnd: HWND, lpcwText: LPCWSTR, fDrawFocused: BOOL) -> BOOL { - return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, cast(WPARAM)fDrawFocused, cast(LPARAM)uintptr(lpcwText)) + return cast(BOOL)SendMessageW(hwnd, EM_SETCUEBANNER, cast(WPARAM)fDrawFocused, cast(LPARAM)uintptr(rawptr(lpcwText))) } Edit_GetCueBannerText :: #force_inline proc "system" (hwnd: HWND, lpwText: LPWSTR, cchText: LONG) -> BOOL { return cast(BOOL)SendMessageW(hwnd, EM_GETCUEBANNER, uintptr(lpwText), cast(LPARAM)cchText) @@ -1197,7 +1197,7 @@ ListView_GetItemPosition :: #force_inline proc "system" (hwnd: HWND, i: c_int, p return cast(BOOL)SendMessageW(hwnd, LVM_GETITEMPOSITION, cast(WPARAM)i, cast(LPARAM)uintptr(ppt)) } ListView_GetStringWidth :: #force_inline proc "system" (hwndLV: HWND, psz: LPCWSTR) -> c_int { - return cast(c_int)SendMessageW(hwndLV, LVM_GETSTRINGWIDTHW, 0, cast(LPARAM)uintptr(psz)) + return cast(c_int)SendMessageW(hwndLV, LVM_GETSTRINGWIDTHW, 0, cast(LPARAM)uintptr(rawptr(psz))) } ListView_HitTest :: #force_inline proc "system" (hwndLV: HWND, pinfo: ^LV_HITTESTINFO) -> c_int { return cast(c_int)SendMessageW(hwndLV, LVM_HITTEST, 0, cast(LPARAM)uintptr(pinfo)) diff --git a/core/sys/windows/ip_helper.odin b/core/sys/windows/ip_helper.odin index 7a6e545ac..d2e75d531 100644 --- a/core/sys/windows/ip_helper.odin +++ b/core/sys/windows/ip_helper.odin @@ -38,9 +38,9 @@ IP_Adapter_Addresses :: struct { FirstAnycastAddress: ^IP_ADAPTER_ANYCAST_ADDRESS_XP, FirstMulticastAddress: ^IP_ADAPTER_MULTICAST_ADDRESS_XP, FirstDnsServerAddress: ^IP_ADAPTER_DNS_SERVER_ADDRESS_XP, - DnsSuffix: ^u16, - Description: ^u16, - FriendlyName: ^u16, + DnsSuffix: cstring16, + Description: cstring16, + FriendlyName: cstring16, PhysicalAddress: [8]u8, PhysicalAddressLength: u32, Anonymous2: struct #raw_union { diff --git a/core/sys/windows/kernel32.odin b/core/sys/windows/kernel32.odin index 76f2897ac..114e70b41 100644 --- a/core/sys/windows/kernel32.odin +++ b/core/sys/windows/kernel32.odin @@ -258,7 +258,7 @@ foreign kernel32 { ) -> BOOL --- CreateProcessW :: proc( lpApplicationName: LPCWSTR, - lpCommandLine: LPWSTR, + lpCommandLine: LPCWSTR, lpProcessAttributes: LPSECURITY_ATTRIBUTES, lpThreadAttributes: LPSECURITY_ATTRIBUTES, bInheritHandles: BOOL, diff --git a/core/sys/windows/types.odin b/core/sys/windows/types.odin index 92b1cb15c..d5c9b1d20 100644 --- a/core/sys/windows/types.odin +++ b/core/sys/windows/types.odin @@ -107,8 +107,8 @@ PDWORD64 :: ^DWORD64 PDWORD_PTR :: ^DWORD_PTR ATOM :: distinct WORD -wstring :: [^]WCHAR -PWSTR :: [^]WCHAR +wstring :: cstring16 +PWSTR :: cstring16 PBYTE :: ^BYTE LPBYTE :: ^BYTE @@ -145,7 +145,7 @@ LPSTR :: ^CHAR LPWSTR :: ^WCHAR OLECHAR :: WCHAR BSTR :: ^OLECHAR -LPOLESTR :: ^OLECHAR +LPOLESTR :: cstring16 LPCOLESTR :: LPCSTR LPFILETIME :: ^FILETIME LPWSABUF :: ^WSABUF @@ -1698,7 +1698,7 @@ NM_FONTCHANGED :: NM_OUTOFMEMORY-22 NM_CUSTOMTEXT :: NM_OUTOFMEMORY-23 // uses NMCUSTOMTEXT struct NM_TVSTATEIMAGECHANGING :: NM_OUTOFMEMORY-23 // uses NMTVSTATEIMAGECHANGING struct, defined after HTREEITEM -PCZZWSTR :: ^WCHAR +PCZZWSTR :: cstring16 SHFILEOPSTRUCTW :: struct { hwnd: HWND, diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 995e8e0e5..10dc907e7 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -122,14 +122,14 @@ utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> wstring { if res := utf8_to_utf16(s, allocator); len(res) > 0 { - return raw_data(res) + return wstring(raw_data(res)) } return nil } utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { if res := utf8_to_utf16(buf, s); len(res) > 0 { - return raw_data(res) + return wstring(raw_data(res)) } return nil } @@ -215,7 +215,7 @@ utf16_to_utf8_alloc :: proc(s: []u16, allocator := context.temp_allocator) -> (r if len(s) == 0 { return "", nil } - return wstring_to_utf8(raw_data(s), len(s), allocator) + return wstring_to_utf8(wstring(raw_data(s)), len(s), allocator) } /* @@ -236,7 +236,7 @@ utf16_to_utf8_buf :: proc(buf: []u8, s: []u16) -> (res: string) { if len(s) == 0 { return } - return wstring_to_utf8(buf, raw_data(s), len(s)) + return wstring_to_utf8(buf, wstring(raw_data(s)), len(s)) } utf16_to_utf8 :: proc{utf16_to_utf8_alloc, utf16_to_utf8_buf} @@ -298,7 +298,7 @@ _add_user :: proc(servername: string, username: string, password: string) -> (ok servername_w = nil } else { server := utf8_to_utf16(servername, context.temp_allocator) - servername_w = &server[0] + servername_w = wstring(&server[0]) } if len(username) == 0 || len(username) > LM20_UNLEN { @@ -348,7 +348,7 @@ get_computer_name_and_account_sid :: proc(username: string) -> (computer_name: s res := LookupAccountNameW( nil, // Look on this computer first - &username_w[0], + wstring(&username_w[0]), &sid, &cbsid, nil, @@ -364,10 +364,10 @@ get_computer_name_and_account_sid :: proc(username: string) -> (computer_name: s res = LookupAccountNameW( nil, - &username_w[0], + wstring(&username_w[0]), &sid, &cbsid, - &cname_w[0], + wstring(&cname_w[0]), &computer_name_size, &pe_use, ) @@ -390,7 +390,7 @@ get_sid :: proc(username: string, sid: ^SID) -> (ok: bool) { res := LookupAccountNameW( nil, // Look on this computer first - &username_w[0], + wstring(&username_w[0]), sid, &cbsid, nil, @@ -406,10 +406,10 @@ get_sid :: proc(username: string, sid: ^SID) -> (ok: bool) { res = LookupAccountNameW( nil, - &username_w[0], + wstring(&username_w[0]), sid, &cbsid, - &cname_w[0], + wstring(&cname_w[0]), &computer_name_size, &pe_use, ) @@ -428,7 +428,7 @@ add_user_to_group :: proc(sid: ^SID, group: string) -> (ok: NET_API_STATUS) { group_name := utf8_to_utf16(group, context.temp_allocator) ok = NetLocalGroupAddMembers( nil, - &group_name[0], + wstring(&group_name[0]), 0, &group_member, 1, @@ -443,7 +443,7 @@ add_del_from_group :: proc(sid: ^SID, group: string) -> (ok: NET_API_STATUS) { group_name := utf8_to_utf16(group, context.temp_allocator) ok = NetLocalGroupDelMembers( nil, - &group_name[0], + cstring16(&group_name[0]), 0, &group_member, 1, @@ -465,19 +465,19 @@ add_user_profile :: proc(username: string) -> (ok: bool, profile_path: string) { if res == false { return false, "" } - defer LocalFree(sb) + defer LocalFree(rawptr(sb)) pszProfilePath := make([]u16, 257, context.temp_allocator) res2 := CreateProfile( sb, - &username_w[0], - &pszProfilePath[0], + cstring16(&username_w[0]), + cstring16(&pszProfilePath[0]), 257, ) if res2 != 0 { return false, "" } - profile_path = wstring_to_utf8(&pszProfilePath[0], 257) or_else "" + profile_path = wstring_to_utf8(wstring(&pszProfilePath[0]), 257) or_else "" return true, profile_path } @@ -495,7 +495,7 @@ delete_user_profile :: proc(username: string) -> (ok: bool) { if res == false { return false } - defer LocalFree(sb) + defer LocalFree(rawptr(sb)) res2 := DeleteProfileW( sb, @@ -548,13 +548,13 @@ delete_user :: proc(servername: string, username: string) -> (ok: bool) { servername_w = nil } else { server := utf8_to_utf16(servername, context.temp_allocator) - servername_w = &server[0] + servername_w = wstring(&server[0]) } username_w := utf8_to_utf16(username) res := NetUserDel( servername_w, - &username_w[0], + wstring(&username_w[0]), ) if res != .Success { return false @@ -586,9 +586,9 @@ run_as_user :: proc(username, password, application, commandline: string, pi: ^P user_token: HANDLE ok = bool(LogonUserW( - lpszUsername = &username_w[0], - lpszDomain = &domain_w[0], - lpszPassword = &password_w[0], + lpszUsername = wstring(&username_w[0]), + lpszDomain = wstring(&domain_w[0]), + lpszPassword = wstring(&password_w[0]), dwLogonType = .NEW_CREDENTIALS, dwLogonProvider = .WINNT50, phToken = &user_token, @@ -605,8 +605,8 @@ run_as_user :: proc(username, password, application, commandline: string, pi: ^P ok = bool(CreateProcessAsUserW( user_token, - &app_w[0], - &commandline_w[0], + wstring(&app_w[0]), + wstring(&commandline_w[0]), nil, // lpProcessAttributes, nil, // lpThreadAttributes, false, // bInheritHandles, diff --git a/core/time/timezone/tz_windows.odin b/core/time/timezone/tz_windows.odin index 8dc5f533c..fe00719a2 100644 --- a/core/time/timezone/tz_windows.odin +++ b/core/time/timezone/tz_windows.odin @@ -159,9 +159,9 @@ iana_to_windows_tz :: proc(iana_name: string, allocator := context.allocator) -> status: windows.UError iana_name_wstr := windows.utf8_to_wstring(iana_name, allocator) - defer free(iana_name_wstr, allocator) + defer free(rawptr(iana_name_wstr), allocator) - wintz_name_len := windows.ucal_getWindowsTimeZoneID(iana_name_wstr, -1, raw_data(wintz_name_buffer[:]), len(wintz_name_buffer), &status) + wintz_name_len := windows.ucal_getWindowsTimeZoneID(iana_name_wstr, -1, cstring16(raw_data(wintz_name_buffer[:])), len(wintz_name_buffer), &status) if status != .U_ZERO_ERROR { return } @@ -178,7 +178,7 @@ local_tz_name :: proc(allocator := context.allocator) -> (name: string, success: iana_name_buffer: [128]u16 status: windows.UError - zone_str_len := windows.ucal_getDefaultTimeZone(raw_data(iana_name_buffer[:]), len(iana_name_buffer), &status) + zone_str_len := windows.ucal_getDefaultTimeZone(cstring16(raw_data(iana_name_buffer[:])), len(iana_name_buffer), &status) if status != .U_ZERO_ERROR { return } @@ -291,7 +291,7 @@ _region_load :: proc(reg_str: string, allocator := context.allocator) -> (out_re defer delete(tz_key, allocator) tz_key_wstr := windows.utf8_to_wstring(tz_key, allocator) - defer free(tz_key_wstr, allocator) + defer free(rawptr(tz_key_wstr), allocator) key: windows.HKEY res := windows.RegOpenKeyExW(windows.HKEY_LOCAL_MACHINE, tz_key_wstr, 0, windows.KEY_READ, &key) diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin index e2bcf7f68..d3f98584b 100644 --- a/core/unicode/utf16/utf16.odin +++ b/core/unicode/utf16/utf16.odin @@ -106,7 +106,57 @@ decode :: proc(d: []rune, s: []u16) -> (n: int) { return } -rune_count :: proc(s: []u16) -> (n: int) { +decode_rune_in_string :: proc(s: string16) -> (r: rune, width: int) { + r = rune(REPLACEMENT_CHAR) + n := len(s) + if n < 1 { + return + } + width = 1 + + + switch c := s[0]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && 1 < len(s) && + _surr2 <= s[1] && s[1] < _surr3: + r = decode_surrogate_pair(rune(c), rune(s[1])) + width += 1 + } + return +} + +string_to_runes :: proc "odin" (s: string16, allocator := context.allocator) -> (runes: []rune) { + n := rune_count(s) + + runes = make([]rune, n, allocator) + i := 0 + for r in s { + runes[i] = r + i += 1 + } + return +} + + +rune_count :: proc{ + rune_count_in_string, + rune_count_in_slice, +} +rune_count_in_string :: proc(s: string16) -> (n: int) { + for i := 0; i < len(s); i += 1 { + c := s[i] + if _surr1 <= c && c < _surr2 && i+1 < len(s) && + _surr2 <= s[i+1] && s[i+1] < _surr3 { + i += 1 + } + n += 1 + } + return +} + + +rune_count_in_slice :: proc(s: []u16) -> (n: int) { for i := 0; i < len(s); i += 1 { c := s[i] if _surr1 <= c && c < _surr2 && i+1 < len(s) && diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 46a4f9ae5..40bbe41e5 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -1089,7 +1089,7 @@ gb_internal String internal_odin_root_dir(void) { text = gb_alloc_array(permanent_allocator(), wchar_t, len+1); GetModuleFileNameW(nullptr, text, cast(int)len); - path = string16_to_string(heap_allocator(), make_string16(text, len)); + path = string16_to_string(heap_allocator(), make_string16(cast(u16 *)text, len)); for (i = path.len-1; i >= 0; i--) { u8 c = path[i]; @@ -1387,14 +1387,14 @@ gb_internal String path_to_fullpath(gbAllocator a, String s, bool *ok_) { mutex_lock(&fullpath_mutex); - len = GetFullPathNameW(&string16[0], 0, nullptr, nullptr); + len = GetFullPathNameW(cast(wchar_t *)&string16[0], 0, nullptr, nullptr); if (len != 0) { wchar_t *text = gb_alloc_array(permanent_allocator(), wchar_t, len+1); - GetFullPathNameW(&string16[0], len, text, nullptr); + GetFullPathNameW(cast(wchar_t *)&string16[0], len, text, nullptr); mutex_unlock(&fullpath_mutex); text[len] = 0; - result = string16_to_string(a, make_string16(text, len)); + result = string16_to_string(a, make_string16(cast(u16 *)text, len)); result = string_trim_whitespace(result); // Replace Windows style separators diff --git a/src/cached.cpp b/src/cached.cpp index efdadce7b..61b5d01b4 100644 --- a/src/cached.cpp +++ b/src/cached.cpp @@ -231,7 +231,7 @@ Array<String> cache_gather_envs() { wchar_t *curr_string = strings; while (curr_string && *curr_string) { - String16 wstr = make_string16_c(curr_string); + String16 wstr = make_string16_c(cast(u16 *)curr_string); curr_string += wstr.len+1; String str = string16_to_string(temporary_allocator(), wstr); if (string_starts_with(str, str_lit("CURR_DATE_TIME="))) { diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 880de73f5..57413f519 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool is_type_complex, is_type_quaternion, is_type_string, + is_type_string16, is_type_typeid, is_type_any, is_type_endian_platform, @@ -2328,13 +2329,23 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As if (is_type_string(op_type) && id == BuiltinProc_len) { if (operand->mode == Addressing_Constant) { mode = Addressing_Constant; - String str = operand->value.value_string; - value = exact_value_i64(str.len); + + if (operand->value.kind == ExactValue_String) { + String str = operand->value.value_string; + value = exact_value_i64(str.len); + } else if (operand->value.kind == ExactValue_String16) { + String16 str = operand->value.value_string16; + value = exact_value_i64(str.len); + } else { + GB_PANIC("Unhandled value kind: %d", operand->value.kind); + } type = t_untyped_integer; } else { mode = Addressing_Value; if (is_type_cstring(op_type)) { add_package_dependency(c, "runtime", "cstring_len"); + } else if (is_type_cstring16(op_type)) { + add_package_dependency(c, "runtime", "cstring16_len"); } } } else if (is_type_array(op_type)) { @@ -4684,7 +4695,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; case Type_Basic: if (t->Basic.kind == Basic_string) { - operand->type = alloc_type_multi_pointer(t_u8); + operand->type = t_u8_multi_ptr; + } else if (t->Basic.kind == Basic_string16) { + operand->type = t_u16_multi_ptr; } break; case Type_Pointer: @@ -6133,6 +6146,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_type_is_complex: case BuiltinProc_type_is_quaternion: case BuiltinProc_type_is_string: + case BuiltinProc_type_is_string16: case BuiltinProc_type_is_typeid: case BuiltinProc_type_is_any: case BuiltinProc_type_is_endian_platform: @@ -7172,7 +7186,11 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } operand->mode = Addressing_Value; - operand->type = alloc_type_multi_pointer(t_u16); + if (type_hint != nullptr && is_type_cstring16(type_hint)) { + operand->type = type_hint; + } else { + operand->type = alloc_type_multi_pointer(t_u16); + } operand->value = {}; break; } diff --git a/src/check_decl.cpp b/src/check_decl.cpp index dd4c09e85..af46ee40e 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -815,6 +815,12 @@ gb_internal bool signature_parameter_similar_enough(Type *x, Type *y) { if (sig_compare(is_type_cstring, is_type_u8_multi_ptr, x, y)) { return true; } + if (sig_compare(is_type_cstring16, is_type_u16_ptr, x, y)) { + return true; + } + if (sig_compare(is_type_cstring16, is_type_u16_multi_ptr, x, y)) { + return true; + } if (sig_compare(is_type_uintptr, is_type_rawptr, x, y)) { return true; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 51fb5511b..faa338f36 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i } else if (is_type_boolean(type)) { return in_value.kind == ExactValue_Bool; } else if (is_type_string(type)) { + if (in_value.kind == ExactValue_String16) { + return is_type_string16(type) || is_type_cstring16(type); + } return in_value.kind == ExactValue_String; } else if (is_type_integer(type) || is_type_rune(type)) { if (in_value.kind == ExactValue_Bool) { @@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i if (in_value.kind == ExactValue_String) { return false; } + if (in_value.kind == ExactValue_String16) { + return false; + } if (out_value) *out_value = in_value; } else if (is_type_bit_set(type)) { if (in_value.kind == ExactValue_Integer) { @@ -2862,6 +2868,14 @@ gb_internal void add_comparison_procedures_for_fields(CheckerContext *c, Type *t add_package_dependency(c, "runtime", "string_eq"); add_package_dependency(c, "runtime", "string_ne"); break; + case Basic_cstring16: + add_package_dependency(c, "runtime", "cstring16_eq"); + add_package_dependency(c, "runtime", "cstring16_ne"); + break; + case Basic_string16: + add_package_dependency(c, "runtime", "string16_eq"); + add_package_dependency(c, "runtime", "string16_ne"); + break; } break; case Type_Struct: @@ -3035,6 +3049,24 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper case Token_LtEq: add_package_dependency(c, "runtime", "cstring_le"); break; case Token_GtEq: add_package_dependency(c, "runtime", "cstring_gt"); break; } + } else if (is_type_cstring16(x->type) && is_type_cstring16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "cstring16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "cstring16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "cstring16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "cstring16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "cstring16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "cstring16_gt"); break; + } + } else if (is_type_string16(x->type) || is_type_string16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "string16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "string16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "string16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "string16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "string16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "string16_gt"); break; + } } else if (is_type_string(x->type) || is_type_string(y->type)) { switch (op) { case Token_CmpEq: add_package_dependency(c, "runtime", "string_eq"); break; @@ -3340,6 +3372,11 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type return true; } + // []u16 <-> string16 (not cstring16) + if (is_type_u16_slice(src) && (is_type_string16(dst) && !is_type_cstring16(dst))) { + return true; + } + // cstring -> string if (are_types_identical(src, t_cstring) && are_types_identical(dst, t_string)) { if (operand->mode != Addressing_Constant) { @@ -3347,6 +3384,14 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type } return true; } + // cstring16 -> string16 + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + if (operand->mode != Addressing_Constant) { + add_package_dependency(c, "runtime", "cstring16_to_string16"); + } + return true; + } + // cstring -> ^u8 if (are_types_identical(src, t_cstring) && is_type_u8_ptr(dst)) { return !is_constant; @@ -3372,6 +3417,34 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type if (is_type_rawptr(src) && are_types_identical(dst, t_cstring)) { return !is_constant; } + + // cstring -> ^u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_ptr(dst)) { + return !is_constant; + } + // cstring -> [^]u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) { + return !is_constant; + } + // cstring16 -> rawptr + if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) { + return !is_constant; + } + + + // ^u16 -> cstring16 + if (is_type_u16_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // [^]u16 -> cstring + if (is_type_u16_multi_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // rawptr -> cstring16 + if (is_type_rawptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // proc <-> proc if (is_type_proc(src) && is_type_proc(dst)) { if (is_type_polymorphic(dst)) { @@ -4558,6 +4631,8 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar // target_type = t_untyped_nil; } else if (is_type_cstring(target_type)) { // target_type = t_untyped_nil; + } else if (is_type_cstring16(target_type)) { + // target_type = t_untyped_nil; } else if (!type_has_nil(target_type)) { operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -4585,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar break; } } + } else if (operand->value.kind == ExactValue_String16) { + String16 s = operand->value.value_string16; + if (is_type_u16_array(t)) { + if (s.len == t->Array.count) { + break; + } + } } operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -4914,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v if (success_) *success_ = true; if (finish_) *finish_ = true; return exact_value_u64(val); + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(0 <= index && index < value.value_string.len); + u16 val = value.value_string16[index]; + if (success_) *success_ = true; + if (finish_) *finish_ = true; + return exact_value_u64(val); } if (value.kind != ExactValue_Compound) { if (success_) *success_ = true; @@ -8226,6 +8314,7 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 case Type_Basic: if (t->Basic.kind == Basic_string) { if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); *max_count = o->value.value_string.len; } if (o->mode != Addressing_Constant) { @@ -8233,6 +8322,16 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 } o->type = t_u8; return true; + } else if (t->Basic.kind == Basic_string16) { + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + *max_count = o->value.value_string16.len; + } + if (o->mode != Addressing_Constant) { + o->mode = Addressing_Value; + } + o->type = t_u16; + return true; } else if (t->Basic.kind == Basic_UntypedString) { if (o->mode == Addressing_Constant) { *max_count = o->value.value_string.len; @@ -10879,9 +10978,17 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { valid = true; if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); max_count = o->value.value_string.len; } o->type = type_deref(o->type); + } else if (t->Basic.kind == Basic_string16) { + valid = true; + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + max_count = o->value.value_string16.len; + } + o->type = type_deref(o->type); } break; @@ -11036,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, o->expr = node; return kind; } - - String s = {}; - if (o->value.kind == ExactValue_String) { - s = o->value.value_string; - } - o->mode = Addressing_Constant; o->type = t; - o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + + if (o->value.kind == ExactValue_String16) { + String16 s = o->value.value_string16; + + o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } else { + String s = {}; + if (o->value.kind == ExactValue_String) { + s = o->value.value_string; + } + + o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1])); + } } return kind; } @@ -11133,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast Type *t = t_invalid; switch (node->tav.value.kind) { case ExactValue_String: t = t_untyped_string; break; + case ExactValue_String16: t = t_string16; break; // TODO(bill): determine this correctly case ExactValue_Float: t = t_untyped_float; break; case ExactValue_Complex: t = t_untyped_complex; break; case ExactValue_Quaternion: t = t_untyped_quaternion; break; @@ -11569,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) { return !v.value_bool; case ExactValue_String: return v.value_string.len == 0; + case ExactValue_String16: + return v.value_string16.len == 0; case ExactValue_Integer: return big_int_is_zero(&v.value_integer); case ExactValue_Float: diff --git a/src/check_stmt.cpp b/src/check_stmt.cpp index bc9b6c5dd..ae88ff333 100644 --- a/src/check_stmt.cpp +++ b/src/check_stmt.cpp @@ -974,7 +974,14 @@ gb_internal void check_unroll_range_stmt(CheckerContext *ctx, Ast *node, u32 mod Type *t = base_type(operand.type); switch (t->kind) { case Type_Basic: - if (is_type_string(t) && t->Basic.kind != Basic_cstring) { + if (is_type_string16(t) && t->Basic.kind != Basic_cstring) { + val0 = t_rune; + val1 = t_int; + inline_for_depth = exact_value_i64(operand.value.value_string.len); + if (unroll_count > 0) { + error(node, "#unroll(%lld) does not support strings", cast(long long)unroll_count); + } + } else if (is_type_string(t) && t->Basic.kind != Basic_cstring) { val0 = t_rune; val1 = t_int; inline_for_depth = exact_value_i64(operand.value.value_string.len); @@ -1236,7 +1243,11 @@ gb_internal void check_switch_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags add_to_seen_map(ctx, &seen, upper_op, x, lhs, rhs); - if (is_type_string(x.type)) { + if (is_type_string16(x.type)) { + // NOTE(bill): Force dependency for strings here + add_package_dependency(ctx, "runtime", "string16_le"); + add_package_dependency(ctx, "runtime", "string16_lt"); + } else if (is_type_string(x.type)) { // NOTE(bill): Force dependency for strings here add_package_dependency(ctx, "runtime", "string_le"); add_package_dependency(ctx, "runtime", "string_lt"); @@ -1770,7 +1781,16 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags) switch (t->kind) { case Type_Basic: - if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { + if (t->Basic.kind == Basic_string16) { + is_possibly_addressable = false; + array_add(&vals, t_rune); + array_add(&vals, t_int); + if (is_reverse) { + add_package_dependency(ctx, "runtime", "string16_decode_last_rune"); + } else { + add_package_dependency(ctx, "runtime", "string16_decode_rune"); + } + } else if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { is_possibly_addressable = false; array_add(&vals, t_rune); array_add(&vals, t_int); diff --git a/src/checker.cpp b/src/checker.cpp index dbe2af866..e72061f56 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1363,13 +1363,15 @@ gb_internal void init_universal(void) { } - t_u8_ptr = alloc_type_pointer(t_u8); - t_u8_multi_ptr = alloc_type_multi_pointer(t_u8); - t_int_ptr = alloc_type_pointer(t_int); - t_i64_ptr = alloc_type_pointer(t_i64); - t_f64_ptr = alloc_type_pointer(t_f64); - t_u8_slice = alloc_type_slice(t_u8); - t_string_slice = alloc_type_slice(t_string); + t_u8_ptr = alloc_type_pointer(t_u8); + t_u8_multi_ptr = alloc_type_multi_pointer(t_u8); + t_u16_ptr = alloc_type_pointer(t_u16); + t_u16_multi_ptr = alloc_type_multi_pointer(t_u16); + t_int_ptr = alloc_type_pointer(t_int); + t_i64_ptr = alloc_type_pointer(t_i64); + t_f64_ptr = alloc_type_pointer(t_f64); + t_u8_slice = alloc_type_slice(t_u8); + t_string_slice = alloc_type_slice(t_string); // intrinsics types for objective-c stuff { @@ -3099,6 +3101,9 @@ gb_internal void init_core_type_info(Checker *c) { GB_ASSERT(tis->fields.count == 5); + Entity *type_info_string_encoding_kind = find_core_entity(c, str_lit("Type_Info_String_Encoding_Kind")); + t_type_info_string_encoding_kind = type_info_string_encoding_kind->type; + Entity *type_info_variant = tis->fields[4]; Type *tiv_type = type_info_variant->type; GB_ASSERT(is_type_union(tiv_type)); diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index e9655e88a..b8b105fd2 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin, BuiltinProc_type_is_complex, BuiltinProc_type_is_quaternion, BuiltinProc_type_is_string, + BuiltinProc_type_is_string16, BuiltinProc_type_is_typeid, BuiltinProc_type_is_any, @@ -608,6 +609,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("type_is_complex"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_quaternion"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_string"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("type_is_string16"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_typeid"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/common.cpp b/src/common.cpp index ad1e5a851..53848cacf 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -350,6 +350,7 @@ gb_global bool global_module_path_set = false; #include "ptr_map.cpp" #include "ptr_set.cpp" #include "string_map.cpp" +#include "string16_map.cpp" #include "string_set.cpp" #include "priority_queue.cpp" #include "thread_pool.cpp" @@ -669,7 +670,7 @@ gb_internal gb_inline f64 gb_sqrt(f64 x) { gb_internal wchar_t **command_line_to_wargv(wchar_t *cmd_line, int *_argc) { u32 i, j; - u32 len = cast(u32)string16_len(cmd_line); + u32 len = cast(u32)string16_len(cast(u16 *)cmd_line); i = ((len+2)/2)*gb_size_of(void *) + gb_size_of(void *); wchar_t **argv = cast(wchar_t **)GlobalAlloc(GMEM_FIXED, i + (len+2)*gb_size_of(wchar_t)); diff --git a/src/exact_value.cpp b/src/exact_value.cpp index 37751c8f1..f2aed84c2 100644 --- a/src/exact_value.cpp +++ b/src/exact_value.cpp @@ -29,6 +29,7 @@ enum ExactValueKind { ExactValue_Compound = 8, ExactValue_Procedure = 9, ExactValue_Typeid = 10, + ExactValue_String16 = 11, ExactValue_Count, }; @@ -46,6 +47,7 @@ struct ExactValue { Ast * value_compound; Ast * value_procedure; Type * value_typeid; + String16 value_string16; }; }; @@ -66,6 +68,9 @@ gb_internal uintptr hash_exact_value(ExactValue v) { case ExactValue_String: res = gb_fnv32a(v.value_string.text, v.value_string.len); break; + case ExactValue_String16: + res = gb_fnv32a(v.value_string.text, v.value_string.len*gb_size_of(u16)); + break; case ExactValue_Integer: { u32 key = gb_fnv32a(v.value_integer.dp, gb_size_of(*v.value_integer.dp) * v.value_integer.used); @@ -118,6 +123,11 @@ gb_internal ExactValue exact_value_string(String string) { result.value_string = string; return result; } +gb_internal ExactValue exact_value_string16(String16 string) { + ExactValue result = {ExactValue_String16}; + result.value_string16 = string; + return result; +} gb_internal ExactValue exact_value_i64(i64 i) { ExactValue result = {ExactValue_Integer}; @@ -656,6 +666,7 @@ gb_internal i32 exact_value_order(ExactValue const &v) { return 0; case ExactValue_Bool: case ExactValue_String: + case ExactValue_String16: return 1; case ExactValue_Integer: return 2; @@ -689,6 +700,7 @@ gb_internal void match_exact_values(ExactValue *x, ExactValue *y) { case ExactValue_Bool: case ExactValue_String: + case ExactValue_String16: case ExactValue_Quaternion: case ExactValue_Pointer: case ExactValue_Compound: @@ -891,7 +903,18 @@ gb_internal ExactValue exact_binary_operator_value(TokenKind op, ExactValue x, E gb_memmove(data, sx.text, sx.len); gb_memmove(data+sx.len, sy.text, sy.len); return exact_value_string(make_string(data, len)); - break; + } + case ExactValue_String16: { + if (op != Token_Add) goto error; + + // NOTE(bill): How do you minimize this over allocation? + String sx = x.value_string; + String sy = y.value_string; + isize len = sx.len+sy.len; + u16 *data = gb_alloc_array(permanent_allocator(), u16, len); + gb_memmove(data, sx.text, sx.len*gb_size_of(u16)); + gb_memmove(data+sx.len, sy.text, sy.len*gb_size_of(u16)); + return exact_value_string16(make_string16(data, len)); } } @@ -994,6 +1017,19 @@ gb_internal bool compare_exact_values(TokenKind op, ExactValue x, ExactValue y) } break; } + case ExactValue_String16: { + String16 a = x.value_string16; + String16 b = y.value_string16; + switch (op) { + case Token_CmpEq: return a == b; + case Token_NotEq: return a != b; + case Token_Lt: return a < b; + case Token_LtEq: return a <= b; + case Token_Gt: return a > b; + case Token_GtEq: return a >= b; + } + break; + } case ExactValue_Pointer: { switch (op) { @@ -1050,6 +1086,20 @@ gb_internal gbString write_exact_value_to_string(gbString str, ExactValue const gb_free(heap_allocator(), s.text); return str; } + case ExactValue_String16: { + String s = quote_to_ascii(heap_allocator(), v.value_string16); + string_limit = gb_max(string_limit, 36); + if (s.len <= string_limit) { + str = gb_string_append_length(str, s.text, s.len); + } else { + isize n = string_limit/5; + str = gb_string_append_length(str, s.text, n); + str = gb_string_append_fmt(str, "\"..%lld chars..\"", s.len-(2*n)); + str = gb_string_append_length(str, s.text+s.len-n, n); + } + gb_free(heap_allocator(), s.text); + return str; + } case ExactValue_Integer: { String s = big_int_to_string(heap_allocator(), &v.value_integer); str = gb_string_append_length(str, s.text, s.len); diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 13a1d8cf3..f37415cc1 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) { case Basic_string: return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}"); + case Basic_string16: + return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}"); + case Basic_cstring: return str_lit("*"); + case Basic_cstring16: return str_lit("*"); + + case Basic_any: return str_lit("{any=^v^v}"); // rawptr + ^Type_Info case Basic_typeid: diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index fef6e754d..648e8a732 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -173,7 +173,8 @@ struct lbModule { PtrMap<LLVMValueRef, Entity *> procedure_values; Array<lbProcedure *> missing_procedures_to_check; - StringMap<LLVMValueRef> const_strings; + StringMap<LLVMValueRef> const_strings; + String16Map<LLVMValueRef> const_string16s; PtrMap<u64/*type hash*/, struct lbFunctionType *> function_type_map; diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index c3112934e..e64be49f2 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) { gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(!is_type_string16(t)); + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + LLVMValueRef values[3] = { + data, + LLVMConstNull(lb_type(m, t_i32)), + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 3); + } else { + LLVMValueRef values[2] = { + data, + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 2); + } +} + +gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(is_type_string16(t)); if (build_context.metrics.ptr_size < build_context.metrics.int_size) { LLVMValueRef values[3] = { data, @@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) { return lb_const_value(m, t_string, exact_value_string(value)); } +gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) { + return lb_const_value(m, t_string16, exact_value_string16(value)); +} + gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) { lbValue res = {}; @@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb GB_ASSERT(is_type_slice(type)); res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value; return res; - } else { + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(is_type_slice(type)); + res.value = lb_find_or_add_entity_string16_slice_with_type(m, value.value_string16, original_type).value; + return res; + }else { ast_node(cl, CompoundLit, value.value_compound); isize count = cl->elems.count; @@ -751,29 +778,78 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb { bool custom_link_section = cc.link_section.len > 0; - LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + LLVMValueRef ptr = nullptr; lbValue res = {}; res.type = default_type(original_type); + isize len = value.value_string.len; + + if (is_type_string16(res.type) || is_type_cstring16(res.type)) { + TEMPORARY_ALLOCATOR_GUARD(); + String16 s16 = string_to_string16(temporary_allocator(), value.value_string); + len = s16.len; + ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section); + } else { + ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + } + if (custom_link_section) { LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); } - if (is_type_cstring(res.type)) { + if (is_type_cstring(res.type) || is_type_cstring16(res.type)) { res.value = ptr; } else { - if (value.value_string.len == 0) { + if (len == 0) { + if (is_type_string16(res.type)) { + ptr = LLVMConstNull(lb_type(m, t_u16_ptr)); + } else { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + } + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), len, true); + GB_ASSERT(is_type_string(original_type)); + + if (is_type_string16(res.type)) { + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); + } else { + res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + } + } + + return res; + } + + case ExactValue_String16: + { + GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type)); + + bool custom_link_section = cc.link_section.len > 0; + + LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section); + lbValue res = {}; + res.type = default_type(original_type); + + if (custom_link_section) { + LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); + } + + if (is_type_cstring16(res.type)) { + res.value = ptr; + } else { + if (value.value_string16.len == 0) { ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); } - LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true); + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true); GB_ASSERT(is_type_string(original_type)); - res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); } return res; } + case ExactValue_Integer: if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) { LLVMTypeRef t = lb_type(m, original_type); diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 024c5564e..182920fc7 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned); return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7); } + + case Basic_string16: + { + LLVMMetadataRef elements[2] = {}; + elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0); + elements[1] = lb_debug_struct_field(m, str_lit("len"), t_int, int_bits); + return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements)); + } + case Basic_cstring16: + { + LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned); + return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7); + } + case Basic_any: { LLVMMetadataRef elements[2] = {}; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 74aea82f1..5425572c7 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -1559,16 +1559,24 @@ gb_internal lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) { return lb_emit_conv(p, cmp, type); } else if (lb_is_empty_string_constant(be->right) && !is_type_union(be->left->tav.type)) { // `x == ""` or `x != ""` + Type *str_type = t_string; + if (is_type_string16(be->left->tav.type) || is_type_cstring16(be->left->tav.type)) { + str_type = t_string16; + } lbValue s = lb_build_expr(p, be->left); - s = lb_emit_conv(p, s, t_string); + s = lb_emit_conv(p, s, str_type); lbValue len = lb_string_len(p, s); lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0)); Type *type = default_type(tv.type); return lb_emit_conv(p, cmp, type); } else if (lb_is_empty_string_constant(be->left) && !is_type_union(be->right->tav.type)) { // `"" == x` or `"" != x` + Type *str_type = t_string; + if (is_type_string16(be->right->tav.type) || is_type_cstring16(be->right->tav.type)) { + str_type = t_string16; + } lbValue s = lb_build_expr(p, be->right); - s = lb_emit_conv(p, s, t_string); + s = lb_emit_conv(p, s, str_type); lbValue len = lb_string_len(p, s); lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0)); Type *type = default_type(tv.type); @@ -1656,6 +1664,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { res.type = t; res.value = llvm_cstring(m, str); return res; + } else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) { + GB_PANIC("TODO(bill): UTF-16 string"); } // if (is_type_float(dst)) { // return value; @@ -1795,6 +1805,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } + + if (is_type_cstring16(src) && is_type_u16_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u16_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_rawptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_rawptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + TEMPORARY_ALLOCATOR_GUARD(); + + lbValue c = lb_emit_conv(p, value, t_cstring16); + auto args = array_make<lbValue>(temporary_allocator(), 1); + args[0] = c; + lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args); + return lb_emit_conv(p, s, dst); + } + + + // integer -> boolean if (is_type_integer(src) && is_type_boolean(dst)) { lbValue res = {}; @@ -2296,6 +2338,29 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return res; } + // [^]u16 <-> cstring16 + if (is_type_u16_multi_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_u16_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_cstring16(src) && is_type_u16_ptr(dst)) { + return lb_emit_transmute(p, value, t); + } + + + // []u16 <-> string16 + if (is_type_u16_slice(src) && is_type_string16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_string16(src) && is_type_u16_slice(dst)) { + return lb_emit_transmute(p, value, t); + } + // []byte/[]u8 <-> string if (is_type_u8_slice(src) && is_type_string(dst)) { return lb_emit_transmute(p, value, t); @@ -2304,6 +2369,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return lb_emit_transmute(p, value, t); } + if (is_type_array_like(dst)) { Type *elem = base_array_type(dst); isize index_count = cast(isize)get_array_type_count(dst); @@ -2710,7 +2776,53 @@ gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left return lb_compare_records(p, op_kind, left, right, b); } + + if (is_type_string16(a) || is_type_cstring16(a)) { + if (is_type_cstring16(a) && is_type_cstring16(b)) { + left = lb_emit_conv(p, left, t_cstring16); + right = lb_emit_conv(p, right, t_cstring16); + char const *runtime_procedure = nullptr; + switch (op_kind) { + case Token_CmpEq: runtime_procedure = "cstring16_eq"; break; + case Token_NotEq: runtime_procedure = "cstring16_ne"; break; + case Token_Lt: runtime_procedure = "cstring16_lt"; break; + case Token_Gt: runtime_procedure = "cstring16_gt"; break; + case Token_LtEq: runtime_procedure = "cstring16_le"; break; + case Token_GtEq: runtime_procedure = "cstring16_ge"; break; + } + GB_ASSERT(runtime_procedure != nullptr); + + auto args = array_make<lbValue>(permanent_allocator(), 2); + args[0] = left; + args[1] = right; + return lb_emit_runtime_call(p, runtime_procedure, args); + } + + + if (is_type_cstring16(a) ^ is_type_cstring16(b)) { + left = lb_emit_conv(p, left, t_string16); + right = lb_emit_conv(p, right, t_string16); + } + + char const *runtime_procedure = nullptr; + switch (op_kind) { + case Token_CmpEq: runtime_procedure = "string16_eq"; break; + case Token_NotEq: runtime_procedure = "string16_ne"; break; + case Token_Lt: runtime_procedure = "string16_lt"; break; + case Token_Gt: runtime_procedure = "string16_gt"; break; + case Token_LtEq: runtime_procedure = "string16_le"; break; + case Token_GtEq: runtime_procedure = "string16_ge"; break; + } + GB_ASSERT(runtime_procedure != nullptr); + + auto args = array_make<lbValue>(permanent_allocator(), 2); + args[0] = left; + args[1] = right; + return lb_emit_runtime_call(p, runtime_procedure, args); + } + if (is_type_string(a)) { + if (is_type_cstring(a) && is_type_cstring(b)) { left = lb_emit_conv(p, left, t_cstring); right = lb_emit_conv(p, right, t_cstring); @@ -3056,6 +3168,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); } return res; + case Basic_cstring16: + if (op_kind == Token_CmpEq) { + res.value = LLVMBuildIsNull(p->builder, x.value, ""); + } else if (op_kind == Token_NotEq) { + res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); + } + return res; case Basic_any: { // TODO(bill): is this correct behaviour for nil comparison for any? @@ -4298,12 +4417,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) { } - case Type_Basic: { // Basic_string + case Type_Basic: { // Basic_string/Basic_string16 lbValue str; lbValue elem; lbValue len; lbValue index; + str = lb_build_expr(p, ie->expr); if (deref) { str = lb_emit_load(p, str); @@ -4432,6 +4552,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) { } case Type_Basic: { + if (is_type_string16(type)) { + GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type)); + lbValue len = lb_string_len(p, base); + if (high.value == nullptr) high = len; + + if (!no_indices) { + lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr); + } + + lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low); + lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int); + + lbAddr str = lb_add_local_generated(p, t_string16, false); + lb_fill_string(p, str, elem, new_len); + return str; + } GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type)); lbValue len = lb_string_len(p, base); if (high.value == nullptr) high = len; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 3ce0c725f..d84b8302b 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -85,6 +85,7 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) { string_map_init(&m->members); string_map_init(&m->procedures); string_map_init(&m->const_strings); + string16_map_init(&m->const_string16s); map_init(&m->function_type_map); string_map_init(&m->gen_procs); if (USE_SEPARATE_MODULES) { @@ -1812,6 +1813,37 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { return type; } case Basic_cstring: return LLVMPointerType(LLVMInt8TypeInContext(ctx), 0); + + + case Basic_string16: + { + char const *name = "..string16"; + LLVMTypeRef type = LLVMGetTypeByName(m->mod, name); + if (type != nullptr) { + return type; + } + type = LLVMStructCreateNamed(ctx, name); + + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + GB_ASSERT(build_context.metrics.ptr_size == 4); + GB_ASSERT(build_context.metrics.int_size == 8); + LLVMTypeRef fields[3] = { + LLVMPointerType(lb_type(m, t_u16), 0), + lb_type(m, t_i32), + lb_type(m, t_int), + }; + LLVMStructSetBody(type, fields, 3, false); + } else { + LLVMTypeRef fields[2] = { + LLVMPointerType(lb_type(m, t_u16), 0), + lb_type(m, t_int), + }; + LLVMStructSetBody(type, fields, 2, false); + } + return type; + } + case Basic_cstring16: return LLVMPointerType(LLVMInt16TypeInContext(ctx), 0); + case Basic_any: { char const *name = "..any"; @@ -2684,6 +2716,57 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co } } +gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) { + String16HashKey key = {}; + LLVMValueRef *found = nullptr; + + if (!custom_link_section) { + key = string_hash_string(str); + found = string16_map_get(&m->const_string16s, key); + } + if (found != nullptr) { + return *found; + } + + + + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csbs$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 2); + + LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + if (!custom_link_section) { + string16_map_set(&m->const_string16s, key, ptr); + } + return ptr; +} + gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) { LLVMValueRef ptr = nullptr; if (str.len != 0) { @@ -2744,6 +2827,60 @@ gb_internal lbValue lb_find_or_add_entity_string_byte_slice_with_type(lbModule * return res; } +gb_internal lbValue lb_find_or_add_entity_string16_slice_with_type(lbModule *m, String16 const &str, Type *slice_type) { + GB_ASSERT(is_type_slice(slice_type)); + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csba$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 2); + + i64 data_len = str.len; + LLVMValueRef ptr = nullptr; + if (data_len != 0) { + ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + } else { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + if (!is_type_u16_slice(slice_type)) { + Type *bt = base_type(slice_type); + Type *elem = bt->Slice.elem; + i64 sz = type_size_of(elem); + GB_ASSERT(sz > 0); + ptr = LLVMConstPointerCast(ptr, lb_type(m, alloc_type_pointer(elem))); + data_len /= sz; + } + + LLVMValueRef len = LLVMConstInt(lb_type(m, t_int), data_len, true); + LLVMValueRef values[2] = {ptr, len}; + + lbValue res = {}; + res.value = llvm_const_named_struct(m, slice_type, values, 2); + res.type = slice_type; + return res; +} gb_internal lbValue lb_find_ident(lbProcedure *p, lbModule *m, Entity *e, Ast *expr) { diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index e63c92f6f..8f306b771 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2289,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } if (is_type_cstring(t)) { return lb_cstring_len(p, v); + } else if (is_type_cstring16(t)) { + return lb_cstring16_len(p, v); + } else if (is_type_string16(t)) { + return lb_string_len(p, v); } else if (is_type_string(t)) { return lb_string_len(p, v); } else if (is_type_array(t)) { @@ -2728,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu res = lb_emit_conv(p, res, tv.type); } else if (t->Basic.kind == Basic_cstring) { res = lb_emit_conv(p, x, tv.type); + } else if (t->Basic.kind == Basic_string16) { + res = lb_string_elem(p, x); + res = lb_emit_conv(p, res, tv.type); + } else if (t->Basic.kind == Basic_cstring16) { + res = lb_emit_conv(p, x, tv.type); } break; case Type_Pointer: diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 027837f3f..5481ca447 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -622,6 +622,121 @@ gb_internal void lb_build_range_string(lbProcedure *p, lbValue expr, Type *val_t if (done_) *done_ = done; } +gb_internal void lb_build_range_string16(lbProcedure *p, lbValue expr, Type *val_type, + lbValue *val_, lbValue *idx_, lbBlock **loop_, lbBlock **done_, + bool is_reverse) { + + lbModule *m = p->module; + lbValue count = lb_const_int(m, t_int, 0); + Type *expr_type = base_type(expr.type); + switch (expr_type->kind) { + case Type_Basic: + count = lb_string_len(p, expr); + break; + default: + GB_PANIC("Cannot do range_string of %s", type_to_string(expr_type)); + break; + } + + lbValue val = {}; + lbValue idx = {}; + lbBlock *loop = nullptr; + lbBlock *done = nullptr; + lbBlock *body = nullptr; + + loop = lb_create_block(p, "for.string16.loop"); + body = lb_create_block(p, "for.string16.body"); + done = lb_create_block(p, "for.string16.done"); + + lbAddr offset_ = lb_add_local_generated(p, t_int, false); + lbValue offset = {}; + lbValue cond = {}; + + if (!is_reverse) { + /* + for c, offset in str { + ... + } + + offset := 0 + for offset < len(str) { + c, _w := string16_decode_rune(str[offset:]) + ... + offset += _w + } + */ + lb_addr_store(p, offset_, lb_const_int(m, t_int, 0)); + + lb_emit_jump(p, loop); + lb_start_block(p, loop); + + + offset = lb_addr_load(p, offset_); + cond = lb_emit_comp(p, Token_Lt, offset, count); + } else { + // NOTE(bill): REVERSED LOGIC + /* + #reverse for c, offset in str { + ... + } + + offset := len(str) + for offset > 0 { + c, _w := string16_decode_last_rune(str[:offset]) + offset -= _w + ... + } + */ + lb_addr_store(p, offset_, count); + + lb_emit_jump(p, loop); + lb_start_block(p, loop); + + offset = lb_addr_load(p, offset_); + cond = lb_emit_comp(p, Token_Gt, offset, lb_const_int(m, t_int, 0)); + } + lb_emit_if(p, cond, body, done); + lb_start_block(p, body); + + + lbValue rune_and_len = {}; + if (!is_reverse) { + lbValue str_elem = lb_emit_ptr_offset(p, lb_string_elem(p, expr), offset); + lbValue str_len = lb_emit_arith(p, Token_Sub, count, offset, t_int); + auto args = array_make<lbValue>(permanent_allocator(), 1); + args[0] = lb_emit_string16(p, str_elem, str_len); + + rune_and_len = lb_emit_runtime_call(p, "string16_decode_rune", args); + lbValue len = lb_emit_struct_ev(p, rune_and_len, 1); + lb_addr_store(p, offset_, lb_emit_arith(p, Token_Add, offset, len, t_int)); + + idx = offset; + } else { + // NOTE(bill): REVERSED LOGIC + lbValue str_elem = lb_string_elem(p, expr); + lbValue str_len = offset; + auto args = array_make<lbValue>(permanent_allocator(), 1); + args[0] = lb_emit_string16(p, str_elem, str_len); + + rune_and_len = lb_emit_runtime_call(p, "string16_decode_last_rune", args); + lbValue len = lb_emit_struct_ev(p, rune_and_len, 1); + lb_addr_store(p, offset_, lb_emit_arith(p, Token_Sub, offset, len, t_int)); + + idx = lb_addr_load(p, offset_); + } + + + if (val_type != nullptr) { + val = lb_emit_struct_ev(p, rune_and_len, 0); + } + + if (val_) *val_ = val; + if (idx_) *idx_ = idx; + if (loop_) *loop_ = loop; + if (done_) *done_ = done; +} + + gb_internal Ast *lb_strip_and_prefix(Ast *ident) { if (ident != nullptr) { @@ -1138,7 +1253,11 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc } Type *t = base_type(string.type); GB_ASSERT(!is_type_cstring(t)); - lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + if (is_type_string16(t)) { + lb_build_range_string16(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + } else { + lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse); + } break; } case Type_Tuple: diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 43c5f0b40..d1e7c0559 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -525,14 +525,48 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ break; case Basic_string: - tag_type = t_type_info_string; + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } break; case Basic_cstring: { tag_type = t_type_info_string; - LLVMValueRef vals[1] = { + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } + break; + + case Basic_string16: + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } + break; + + + case Basic_cstring16: + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 521553147..dcb95a9a2 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) { case Type_Basic: switch (t->Basic.kind) { case Basic_string: + case Basic_string16: case Basic_any: return true; @@ -190,6 +191,23 @@ gb_internal lbValue lb_emit_clamp(lbProcedure *p, Type *t, lbValue x, lbValue mi return z; } +gb_internal lbValue lb_emit_string16(lbProcedure *p, lbValue str_elem, lbValue str_len) { + if (false && lb_is_const(str_elem) && lb_is_const(str_len)) { + LLVMValueRef values[2] = { + str_elem.value, + str_len.value, + }; + lbValue res = {}; + res.type = t_string16; + res.value = llvm_const_named_struct(p->module, t_string16, values, gb_count_of(values)); + return res; + } else { + lbAddr res = lb_add_local_generated(p, t_string16, false); + lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 0), str_elem); + lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 1), str_len); + return lb_addr_load(p, res); + } +} gb_internal lbValue lb_emit_string(lbProcedure *p, lbValue str_elem, lbValue str_len) { @@ -981,7 +999,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) { } else if (build_context.ptr_size != build_context.int_size) { switch (t->kind) { case Type_Basic: - if (t->Basic.kind != Basic_string) { + if (t->Basic.kind != Basic_string && + t->Basic.kind != Basic_string16) { break; } /*fallthrough*/ @@ -1160,6 +1179,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { case 0: result_type = alloc_type_pointer(t->Slice.elem); break; case 1: result_type = t_int; break; } + } else if (is_type_string16(t)) { + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } } else if (is_type_string(t)) { switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1273,6 +1297,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) { switch (t->kind) { case Type_Basic: switch (t->Basic.kind) { + case Basic_string16: + switch (index) { + case 0: result_type = t_u16_ptr; break; + case 1: result_type = t_int; break; + } + break; case Basic_string: switch (index) { case 0: result_type = t_u8_ptr; break; @@ -1440,6 +1470,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection e = lb_emit_struct_ep(p, e, index); break; + case Basic_string16: + e = lb_emit_struct_ep(p, e, index); + break; + default: GB_PANIC("un-gep-able type %s", type_to_string(type)); break; @@ -1626,11 +1660,17 @@ gb_internal void lb_fill_string(lbProcedure *p, lbAddr const &string, lbValue ba gb_internal lbValue lb_string_elem(lbProcedure *p, lbValue string) { Type *t = base_type(string.type); + if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) { + return lb_emit_struct_ev(p, string, 0); + } GB_ASSERT(t->kind == Type_Basic && t->Basic.kind == Basic_string); return lb_emit_struct_ev(p, string, 0); } gb_internal lbValue lb_string_len(lbProcedure *p, lbValue string) { Type *t = base_type(string.type); + if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) { + return lb_emit_struct_ev(p, string, 1); + } GB_ASSERT_MSG(t->kind == Type_Basic && t->Basic.kind == Basic_string, "%s", type_to_string(t)); return lb_emit_struct_ev(p, string, 1); } @@ -1641,6 +1681,12 @@ gb_internal lbValue lb_cstring_len(lbProcedure *p, lbValue value) { args[0] = lb_emit_conv(p, value, t_cstring); return lb_emit_runtime_call(p, "cstring_len", args); } +gb_internal lbValue lb_cstring16_len(lbProcedure *p, lbValue value) { + GB_ASSERT(is_type_cstring16(value.type)); + auto args = array_make<lbValue>(permanent_allocator(), 1); + args[0] = lb_emit_conv(p, value, t_cstring16); + return lb_emit_runtime_call(p, "cstring16_len", args); +} gb_internal lbValue lb_array_elem(lbProcedure *p, lbValue array_ptr) { diff --git a/src/main.cpp b/src/main.cpp index 112d1208a..5a43e3c02 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -142,9 +142,9 @@ gb_internal i32 system_exec_command_line_app_internal(bool exit_on_err, char con } wcmd = string_to_string16(permanent_allocator(), make_string(cast(u8 *)cmd_line, cmd_len-1)); - if (CreateProcessW(nullptr, wcmd.text, - nullptr, nullptr, true, 0, nullptr, nullptr, - &start_info, &pi)) { + if (CreateProcessW(nullptr, cast(wchar_t *)wcmd.text, + nullptr, nullptr, true, 0, nullptr, nullptr, + &start_info, &pi)) { WaitForSingleObject(pi.hProcess, INFINITE); GetExitCodeProcess(pi.hProcess, cast(DWORD *)&exit_code); @@ -232,7 +232,7 @@ gb_internal Array<String> setup_args(int argc, char const **argv) { wchar_t **wargv = command_line_to_wargv(GetCommandLineW(), &wargc); auto args = array_make<String>(a, 0, wargc); for (isize i = 0; i < wargc; i++) { - wchar_t *warg = wargv[i]; + u16 *warg = cast(u16 *)wargv[i]; isize wlen = string16_len(warg); String16 wstr = make_string16(warg, wlen); String arg = string16_to_string(a, wstr); diff --git a/src/microsoft_craziness.h b/src/microsoft_craziness.h index b0fd22a23..933607a2a 100644 --- a/src/microsoft_craziness.h +++ b/src/microsoft_craziness.h @@ -59,7 +59,7 @@ struct Find_Result { }; gb_internal String mc_wstring_to_string(wchar_t const *str) { - return string16_to_string(mc_allocator, make_string16_c(str)); + return string16_to_string(mc_allocator, make_string16_c(cast(u16 *)str)); } gb_internal String16 mc_string_to_wstring(String str) { @@ -103,7 +103,7 @@ gb_internal HANDLE mc_find_first(String wildcard, MC_Find_Data *find_data) { String16 wildcard_wide = mc_string_to_wstring(wildcard); defer (mc_free(wildcard_wide)); - HANDLE handle = FindFirstFileW(wildcard_wide.text, &_find_data); + HANDLE handle = FindFirstFileW(cast(wchar_t *)wildcard_wide.text, &_find_data); if (handle == INVALID_HANDLE_VALUE) return INVALID_HANDLE_VALUE; find_data->file_attributes = _find_data.dwFileAttributes; diff --git a/src/path.cpp b/src/path.cpp index d5e982088..2b97a04df 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -130,7 +130,7 @@ gb_internal String directory_from_path(String const &s) { String16 wstr = string_to_string16(a, path); defer (gb_free(a, wstr.text)); - i32 attribs = GetFileAttributesW(wstr.text); + i32 attribs = GetFileAttributesW(cast(wchar_t *)wstr.text); if (attribs < 0) return false; return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0; @@ -360,7 +360,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi) defer (gb_free(a, wstr.text)); WIN32_FIND_DATAW file_data = {}; - HANDLE find_file = FindFirstFileW(wstr.text, &file_data); + HANDLE find_file = FindFirstFileW(cast(wchar_t *)wstr.text, &file_data); if (find_file == INVALID_HANDLE_VALUE) { return ReadDirectory_Unknown; } @@ -372,7 +372,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi) wchar_t *filename_w = file_data.cFileName; u64 size = cast(u64)file_data.nFileSizeLow; size |= (cast(u64)file_data.nFileSizeHigh) << 32; - String name = string16_to_string(a, make_string16_c(filename_w)); + String name = string16_to_string(a, make_string16_c(cast(u16 *)filename_w)); if (name == "." || name == "..") { gb_free(a, name.text); continue; @@ -494,7 +494,7 @@ gb_internal bool write_directory(String path) { #else gb_internal bool write_directory(String path) { String16 wstr = string_to_string16(heap_allocator(), path); - LPCWSTR wdirectory_name = wstr.text; + LPCWSTR wdirectory_name = cast(wchar_t *)wstr.text; HANDLE directory = CreateFileW(wdirectory_name, GENERIC_WRITE, diff --git a/src/string.cpp b/src/string.cpp index ae8d066b1..2087a5fee 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -26,15 +26,14 @@ struct String_Iterator { // NOTE(bill): String16 is only used for Windows due to its file directories struct String16 { - wchar_t *text; - isize len; - wchar_t const &operator[](isize i) const { + u16 * text; + isize len; + u16 const &operator[](isize i) const { GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i); return text[i]; } }; - gb_internal gb_inline String make_string(u8 const *text, isize len) { String s; s.text = cast(u8 *)text; @@ -45,19 +44,19 @@ gb_internal gb_inline String make_string(u8 const *text, isize len) { return s; } - -gb_internal gb_inline String16 make_string16(wchar_t const *text, isize len) { +gb_internal gb_inline String16 make_string16(u16 const *text, isize len) { String16 s; - s.text = cast(wchar_t *)text; + s.text = cast(u16 *)text; s.len = len; return s; } -gb_internal isize string16_len(wchar_t const *s) { + +gb_internal isize string16_len(u16 const *s) { if (s == nullptr) { return 0; } - wchar_t const *p = s; + u16 const *p = s; while (*p) { p++; } @@ -69,7 +68,7 @@ gb_internal gb_inline String make_string_c(char const *text) { return make_string(cast(u8 *)cast(void *)text, gb_strlen(text)); } -gb_internal gb_inline String16 make_string16_c(wchar_t const *text) { +gb_internal gb_inline String16 make_string16_c(u16 const *text) { return make_string16(text, string16_len(text)); } @@ -80,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) { return make_string(s.text+lo, hi-lo); } +gb_internal String16 substring(String16 const &s, isize lo, isize hi) { + isize max = s.len; + GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max); + + return make_string16(s.text+lo, hi-lo); +} + gb_internal char *alloc_cstring(gbAllocator a, String s) { char *c_str = gb_alloc_array(a, char, s.len+1); @@ -145,6 +151,27 @@ gb_internal int string_compare(String const &a, String const &b) { return res; } + +gb_internal int string16_compare(String16 const &a, String16 const &b) { + if (a.text == b.text) { + return cast(int)(a.len - b.len); + } + if (a.text == nullptr) { + return -1; + } + if (b.text == nullptr) { + return +1; + } + + uintptr n = gb_min(a.len, b.len); + int res = memcmp(a.text, b.text, n*gb_size_of(u16)); + if (res == 0) { + res = cast(int)(a.len - b.len); + } + return res; +} + + gb_internal isize string_index_byte(String const &s, u8 x) { for (isize i = 0; i < s.len; i++) { if (s.text[i] == x) { @@ -182,6 +209,26 @@ template <isize N> gb_internal bool operator >= (String const &a, char const (&b template <> bool operator == (String const &a, char const (&b)[1]) { return a.len == 0; } template <> bool operator != (String const &a, char const (&b)[1]) { return a.len != 0; } + +gb_internal gb_inline bool str_eq(String16 const &a, String16 const &b) { + if (a.len != b.len) return false; + if (a.len == 0) return true; + return memcmp(a.text, b.text, a.len) == 0; +} +gb_internal gb_inline bool str_ne(String16 const &a, String16 const &b) { return !str_eq(a, b); } +gb_internal gb_inline bool str_lt(String16 const &a, String16 const &b) { return string16_compare(a, b) < 0; } +gb_internal gb_inline bool str_gt(String16 const &a, String16 const &b) { return string16_compare(a, b) > 0; } +gb_internal gb_inline bool str_le(String16 const &a, String16 const &b) { return string16_compare(a, b) <= 0; } +gb_internal gb_inline bool str_ge(String16 const &a, String16 const &b) { return string16_compare(a, b) >= 0; } + +gb_internal gb_inline bool operator == (String16 const &a, String16 const &b) { return str_eq(a, b); } +gb_internal gb_inline bool operator != (String16 const &a, String16 const &b) { return str_ne(a, b); } +gb_internal gb_inline bool operator < (String16 const &a, String16 const &b) { return str_lt(a, b); } +gb_internal gb_inline bool operator > (String16 const &a, String16 const &b) { return str_gt(a, b); } +gb_internal gb_inline bool operator <= (String16 const &a, String16 const &b) { return str_le(a, b); } +gb_internal gb_inline bool operator >= (String16 const &a, String16 const &b) { return str_ge(a, b); } + + gb_internal gb_inline bool string_starts_with(String const &s, String const &prefix) { if (prefix.len > s.len) { return false; @@ -611,10 +658,9 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons -// TODO(bill): Make this non-windows specific gb_internal String16 string_to_string16(gbAllocator a, String s) { int len, len1; - wchar_t *text; + u16 *text; if (s.len < 1) { return make_string16(nullptr, 0); @@ -625,15 +671,14 @@ gb_internal String16 string_to_string16(gbAllocator a, String s) { return make_string16(nullptr, 0); } - text = gb_alloc_array(a, wchar_t, len+1); + text = gb_alloc_array(a, u16, len+1); - len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len); + len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, cast(wchar_t *)text, cast(int)len); if (len1 == 0) { gb_free(a, text); return make_string16(nullptr, 0); } text[len] = 0; - return make_string16(text, len); } @@ -646,7 +691,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) { return make_string(nullptr, 0); } - len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0); + len = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, nullptr, 0); if (len == 0) { return make_string(nullptr, 0); } @@ -654,7 +699,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) { text = gb_alloc_array(a, u8, len+1); - len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len); + len1 = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, cast(char *)text, cast(int)len); if (len1 == 0) { gb_free(a, text); return make_string(nullptr, 0); @@ -674,9 +719,9 @@ gb_internal String temporary_directory(gbAllocator allocator) { return String{0}; } DWORD len = gb_max(MAX_PATH, n); - wchar_t *b = gb_alloc_array(heap_allocator(), wchar_t, len+1); + u16 *b = gb_alloc_array(heap_allocator(), u16, len+1); defer (gb_free(heap_allocator(), b)); - n = GetTempPathW(len, b); + n = GetTempPathW(len, cast(wchar_t *)b); if (n == 3 && b[1] == ':' && b[2] == '\\') { } else if (n > 0 && b[n-1] == '\\') { @@ -791,6 +836,104 @@ gb_internal String quote_to_ascii(gbAllocator a, String str, u8 quote='"') { return res; } +gb_internal Rune decode_surrogate_pair(u16 r1, u16 r2) { + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + static Rune const _surr3 = 0xe000; + static Rune const _surr_self = 0x10000; + + if (_surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3) { + return (((r1-_surr1)<<10) | (r2 - _surr2)) + _surr_self; + } + return GB_RUNE_INVALID; +} + +gb_internal String quote_to_ascii(gbAllocator a, String16 str, u8 quote='"') { + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + static Rune const _surr3 = 0xe000; + static Rune const _surr_self = 0x10000; + + u16 *s = cast(u16 *)str.text; + isize n = str.len; + auto buf = array_make<u8>(a, 0, n*2); + array_add(&buf, quote); + for (isize width = 0; n > 0; s += width, n -= width) { + Rune r = cast(Rune)s[0]; + width = 1; + if (r < _surr1 || _surr3 <= r) { + r = cast(Rune)r; + } else if (_surr1 <= r && r < _surr2) { + if (n>1) { + r = decode_surrogate_pair(s[0], s[1]); + if (r != GB_RUNE_INVALID) { + width = 2; + } + } else { + r = GB_RUNE_INVALID; + } + } + if (width == 1 && r == GB_RUNE_INVALID) { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'x'); + array_add(&buf, cast(u8)lower_hex[s[0]>>4]); + array_add(&buf, cast(u8)lower_hex[s[0]&0xf]); + continue; + } + + if (r == quote || r == '\\') { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, u8(r)); + continue; + } + if (r < 0x80 && is_printable(r)) { + array_add(&buf, u8(r)); + continue; + } + switch (r) { + case '\a': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + default: + if (r < ' ') { + u8 b = cast(u8)r; + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'x'); + array_add(&buf, cast(u8)lower_hex[b>>4]); + array_add(&buf, cast(u8)lower_hex[b&0xf]); + } + if (r > GB_RUNE_MAX) { + r = 0XFFFD; + } + if (r < 0x10000) { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'u'); + for (isize i = 12; i >= 0; i -= 4) { + array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]); + } + } else { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'U'); + for (isize i = 28; i >= 0; i -= 4) { + array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]); + } + } + } + } + + + + array_add(&buf, quote); + String res = {}; + res.text = buf.data; + res.len = buf.count; + return res; +} + diff --git a/src/string16_map.cpp b/src/string16_map.cpp new file mode 100644 index 000000000..c9e2eb817 --- /dev/null +++ b/src/string16_map.cpp @@ -0,0 +1,538 @@ +GB_STATIC_ASSERT(sizeof(MapIndex) == sizeof(u32)); + + +struct String16HashKey { + String16 string; + u32 hash; + + operator String16() const noexcept { + return this->string; + } + operator String16 const &() const noexcept { + return this->string; + } +}; +gb_internal gb_inline u32 string_hash(String16 const &s) { + u32 res = fnv32a(s.text, s.len*gb_size_of(u16)) & 0x7fffffff; + return res | (res == 0); +} + +gb_internal gb_inline String16HashKey string_hash_string(String16 const &s) { + String16HashKey hash_key = {}; + hash_key.hash = string_hash(s); + hash_key.string = s; + return hash_key; +} + + +#if 1 /* old string map */ + +template <typename T> +struct String16MapEntry { + String16 key; + u32 hash; + MapIndex next; + T value; +}; + +template <typename T> +struct String16Map { + MapIndex * hashes; + usize hashes_count; + String16MapEntry<T> *entries; + u32 count; + u32 entries_capacity; +}; + + +template <typename T> gb_internal void string16_map_init (String16Map<T> *h, usize capacity = 16); +template <typename T> gb_internal void string16_map_destroy (String16Map<T> *h); + +template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16HashKey const &key); +template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16HashKey const &key); +template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16HashKey const &key, T const &value); + +// template <typename T> gb_internal void string16_map_remove (String16Map<T> *h, String16HashKey const &key); +template <typename T> gb_internal void string16_map_clear (String16Map<T> *h); +template <typename T> gb_internal void string16_map_grow (String16Map<T> *h); +template <typename T> gb_internal void string16_map_reserve (String16Map<T> *h, usize new_count); + +gb_internal gbAllocator string16_map_allocator(void) { + return heap_allocator(); +} + +template <typename T> +gb_internal gb_inline void string16_map_init(String16Map<T> *h, usize capacity) { + capacity = next_pow2_isize(capacity); + string16_map_reserve(h, capacity); +} + +template <typename T> +gb_internal gb_inline void string16_map_destroy(String16Map<T> *h) { + gb_free(string16_map_allocator(), h->hashes); + gb_free(string16_map_allocator(), h->entries); +} + + +template <typename T> +gb_internal void string16_map__resize_hashes(String16Map<T> *h, usize count) { + h->hashes_count = cast(u32)resize_array_raw(&h->hashes, string16_map_allocator(), h->hashes_count, count, MAP_CACHE_LINE_SIZE); +} + + +template <typename T> +gb_internal void string16_map__reserve_entries(String16Map<T> *h, usize capacity) { + h->entries_capacity = cast(u32)resize_array_raw(&h->entries, string16_map_allocator(), h->entries_capacity, capacity, MAP_CACHE_LINE_SIZE); +} + + +template <typename T> +gb_internal MapIndex string16_map__add_entry(String16Map<T> *h, u32 hash, String16 const &key) { + String16MapEntry<T> e = {}; + e.key = key; + e.hash = hash; + e.next = MAP_SENTINEL; + if (h->count+1 >= h->entries_capacity) { + string16_map__reserve_entries(h, gb_max(h->entries_capacity*2, 4)); + } + h->entries[h->count++] = e; + return cast(MapIndex)(h->count-1); +} + +template <typename T> +gb_internal MapFindResult string16_map__find(String16Map<T> *h, u32 hash, String16 const &key) { + MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL}; + if (h->hashes_count != 0) { + fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1)); + fr.entry_index = h->hashes[fr.hash_index]; + while (fr.entry_index != MAP_SENTINEL) { + auto *entry = &h->entries[fr.entry_index]; + if (entry->hash == hash && entry->key == key) { + return fr; + } + fr.entry_prev = fr.entry_index; + fr.entry_index = entry->next; + } + } + return fr; +} + +template <typename T> +gb_internal MapFindResult string16_map__find_from_entry(String16Map<T> *h, String16MapEntry<T> *e) { + MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL}; + if (h->hashes_count != 0) { + fr.hash_index = cast(MapIndex)(e->hash & (h->hashes_count-1)); + fr.entry_index = h->hashes[fr.hash_index]; + while (fr.entry_index != MAP_SENTINEL) { + auto *entry = &h->entries[fr.entry_index]; + if (entry == e) { + return fr; + } + fr.entry_prev = fr.entry_index; + fr.entry_index = entry->next; + } + } + return fr; +} + +template <typename T> +gb_internal b32 string16_map__full(String16Map<T> *h) { + return 0.75f * h->hashes_count <= h->count; +} + +template <typename T> +gb_inline void string16_map_grow(String16Map<T> *h) { + isize new_count = gb_max(h->hashes_count<<1, 16); + string16_map_reserve(h, new_count); +} + + +template <typename T> +gb_internal void string16_map_reset_entries(String16Map<T> *h) { + for (u32 i = 0; i < h->hashes_count; i++) { + h->hashes[i] = MAP_SENTINEL; + } + for (isize i = 0; i < h->count; i++) { + MapFindResult fr; + String16MapEntry<T> *e = &h->entries[i]; + e->next = MAP_SENTINEL; + fr = string16_map__find_from_entry(h, e); + if (fr.entry_prev == MAP_SENTINEL) { + h->hashes[fr.hash_index] = cast(MapIndex)i; + } else { + h->entries[fr.entry_prev].next = cast(MapIndex)i; + } + } +} + +template <typename T> +gb_internal void string16_map_reserve(String16Map<T> *h, usize cap) { + if (h->count*2 < h->hashes_count) { + return; + } + string16_map__reserve_entries(h, cap); + string16_map__resize_hashes(h, cap*2); + string16_map_reset_entries(h); +} + +template <typename T> +gb_internal T *string16_map_get(String16Map<T> *h, u32 hash, String16 const &key) { + MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL}; + if (h->hashes_count != 0) { + fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1)); + fr.entry_index = h->hashes[fr.hash_index]; + while (fr.entry_index != MAP_SENTINEL) { + auto *entry = &h->entries[fr.entry_index]; + if (entry->hash == hash && entry->key == key) { + return &entry->value; + } + fr.entry_prev = fr.entry_index; + fr.entry_index = entry->next; + } + } + return nullptr; +} + + +template <typename T> +gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16HashKey const &key) { + return string16_map_get(h, key.hash, key.string); +} +template <typename T> +gb_internal T &string16_map_must_get(String16Map<T> *h, u32 hash, String16 const &key) { + isize index = string16_map__find(h, hash, key).entry_index; + GB_ASSERT(index != MAP_SENTINEL); + return h->entries[index].value; +} + +template <typename T> +gb_internal T &string16_map_must_get(String16Map<T> *h, String16HashKey const &key) { + return string16_map_must_get(h, key.hash, key.string); +} + +template <typename T> +gb_internal void string16_map_set(String16Map<T> *h, u32 hash, String16 const &key, T const &value) { + MapIndex index; + MapFindResult fr; + if (h->hashes_count == 0) { + string16_map_grow(h); + } + fr = string16_map__find(h, hash, key); + if (fr.entry_index != MAP_SENTINEL) { + index = fr.entry_index; + } else { + index = string16_map__add_entry(h, hash, key); + if (fr.entry_prev != MAP_SENTINEL) { + h->entries[fr.entry_prev].next = index; + } else { + h->hashes[fr.hash_index] = index; + } + } + h->entries[index].value = value; + + if (string16_map__full(h)) { + string16_map_grow(h); + } +} + +template <typename T> +gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16HashKey const &key, T const &value) { + string16_map_set(h, key.hash, key.string, value); +} + + +template <typename T> +gb_internal gb_inline void string16_map_clear(String16Map<T> *h) { + h->count = 0; + for (u32 i = 0; i < h->hashes_count; i++) { + h->hashes[i] = MAP_SENTINEL; + } +} + + + +template <typename T> +gb_internal String16MapEntry<T> *begin(String16Map<T> &m) noexcept { + return m.entries; +} +template <typename T> +gb_internal String16MapEntry<T> const *begin(String16Map<T> const &m) noexcept { + return m.entries; +} + + +template <typename T> +gb_internal String16MapEntry<T> *end(String16Map<T> &m) noexcept { + return m.entries + m.count; +} + +template <typename T> +gb_internal String16MapEntry<T> const *end(String16Map<T> const &m) noexcept { + return m.entries + m.count; +} + +#else /* new string map */ + +template <typename T> +struct StringMapEntry { + String key; + u32 hash; + T value; +}; + +template <typename T> +struct StringMap { + String16MapEntry<T> *entries; + u32 count; + u32 capacity; +}; + + +template <typename T> gb_internal void string16_map_init (String16Map<T> *h, usize capacity = 16); +template <typename T> gb_internal void string16_map_destroy (String16Map<T> *h); + +template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16 const &key); +template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16HashKey const &key); + +template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16 const &key); +template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16HashKey const &key); + +template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16 const &key, T const &value); +template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16HashKey const &key, T const &value); + +// template <typename T> gb_internal void string16_map_remove (String16Map<T> *h, String16HashKey const &key); +template <typename T> gb_internal void string16_map_clear (String16Map<T> *h); +template <typename T> gb_internal void string16_map_grow (String16Map<T> *h); +template <typename T> gb_internal void string16_map_reserve (String16Map<T> *h, usize new_count); + +gb_internal gbAllocator string16_map_allocator(void) { + return heap_allocator(); +} + +template <typename T> +gb_internal gb_inline void string16_map_init(String16Map<T> *h, usize capacity) { + capacity = next_pow2_isize(capacity); + string16_map_reserve(h, capacity); +} + +template <typename T> +gb_internal gb_inline void string16_map_destroy(String16Map<T> *h) { + gb_free(string16_map_allocator(), h->entries); +} + + +template <typename T> +gb_internal void string16_map__insert(String16Map<T> *h, u32 hash, String16 const &key, T const &value) { + if (h->count+1 >= h->capacity) { + string16_map_grow(h); + } + GB_ASSERT(h->count+1 < h->capacity); + + u32 mask = h->capacity-1; + MapIndex index = hash & mask; + MapIndex original_index = index; + do { + auto *entry = h->entries+index; + if (entry->hash == 0) { + entry->key = key; + entry->hash = hash; + entry->value = value; + + h->count += 1; + return; + } + index = (index+1)&mask; + } while (index != original_index); + + GB_PANIC("Full map"); +} + +template <typename T> +gb_internal b32 string16_map__full(String16Map<T> *h) { + return 0.75f * h->count <= h->capacity; +} + +template <typename T> +gb_inline void string16_map_grow(String16Map<T> *h) { + isize new_capacity = gb_max(h->capacity<<1, 16); + string16_map_reserve(h, new_capacity); +} + + +template <typename T> +gb_internal void string16_map_reserve(String16Map<T> *h, usize cap) { + if (cap < h->capacity) { + return; + } + cap = next_pow2_isize(cap); + + String16Map<T> new_h = {}; + new_h.count = 0; + new_h.capacity = cast(u32)cap; + new_h.entries = gb_alloc_array(string16_map_allocator(), String16MapEntry<T>, new_h.capacity); + + if (h->count) { + for (u32 i = 0; i < h->capacity; i++) { + auto *entry = h->entries+i; + if (entry->hash) { + string16_map__insert(&new_h, entry->hash, entry->key, entry->value); + } + } + } + string16_map_destroy(h); + *h = new_h; +} + +template <typename T> +gb_internal T *string16_map_get(String16Map<T> *h, u32 hash, String16 const &key) { + if (h->count == 0) { + return nullptr; + } + u32 mask = (h->capacity-1); + u32 index = hash & mask; + u32 original_index = index; + do { + auto *entry = h->entries+index; + u32 curr_hash = entry->hash; + if (curr_hash == 0) { + // NOTE(bill): no found, but there isn't any key removal for this hash map + return nullptr; + } else if (curr_hash == hash && entry->key == key) { + return &entry->value; + } + index = (index+1) & mask; + } while (original_index != index); + return nullptr; +} + + +template <typename T> +gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16HashKey const &key) { + return string16_map_get(h, key.hash, key.string); +} + +template <typename T> +gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16 const &key) { + return string16_map_get(h, string_hash(key), key); +} + +template <typename T> +gb_internal T &string16_map_must_get(String16Map<T> *h, u32 hash, String16 const &key) { + T *found = string16_map_get(h, hash, key); + GB_ASSERT(found != nullptr); + return *found; +} + +template <typename T> +gb_internal T &string16_map_must_get(String16Map<T> *h, String16HashKey const &key) { + return string16_map_must_get(h, key.hash, key.string); +} + +template <typename T> +gb_internal gb_inline T &string16_map_must_get(String16Map<T> *h, String16 const &key) { + return string16_map_must_get(h, string_hash(key), key); +} + +template <typename T> +gb_internal void string16_map_set(String16Map<T> *h, u32 hash, String16 const &key, T const &value) { + if (h->count == 0) { + string16_map_grow(h); + } + auto *found = string16_map_get(h, hash, key); + if (found) { + *found = value; + return; + } + string16_map__insert(h, hash, key, value); +} + +template <typename T> +gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16 const &key, T const &value) { + string16_map_set(h, string_hash_string(key), value); +} + +template <typename T> +gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16HashKey const &key, T const &value) { + string16_map_set(h, key.hash, key.string, value); +} + + +template <typename T> +gb_internal gb_inline void string16_map_clear(String16Map<T> *h) { + h->count = 0; + gb_zero_array(h->entries, h->capacity); +} + + +template <typename T> +struct StringMapIterator { + String16Map<T> *map; + MapIndex index; + + StringMapIterator<T> &operator++() noexcept { + for (;;) { + ++index; + if (map->capacity == index) { + return *this; + } + String16MapEntry<T> *entry = map->entries+index; + if (entry->hash != 0) { + return *this; + } + } + } + + bool operator==(StringMapIterator<T> const &other) const noexcept { + return this->map == other->map && this->index == other->index; + } + + operator String16MapEntry<T> *() const { + return map->entries+index; + } +}; + + +template <typename T> +gb_internal StringMapIterator<T> end(String16Map<T> &m) noexcept { + return StringMapIterator<T>{&m, m.capacity}; +} + +template <typename T> +gb_internal StringMapIterator<T> const end(String16Map<T> const &m) noexcept { + return StringMapIterator<T>{&m, m.capacity}; +} + + + +template <typename T> +gb_internal StringMapIterator<T> begin(String16Map<T> &m) noexcept { + if (m.count == 0) { + return end(m); + } + + MapIndex index = 0; + while (index < m.capacity) { + if (m.entries[index].hash) { + break; + } + index++; + } + return StringMapIterator<T>{&m, index}; +} +template <typename T> +gb_internal StringMapIterator<T> const begin(String16Map<T> const &m) noexcept { + if (m.count == 0) { + return end(m); + } + + MapIndex index = 0; + while (index < m.capacity) { + if (m.entries[index].hash) { + break; + } + index++; + } + return StringMapIterator<T>{&m, index}; +} + +#endif
\ No newline at end of file diff --git a/src/types.cpp b/src/types.cpp index 29412fa25..c465714db 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -41,8 +41,13 @@ enum BasicKind { Basic_uint, Basic_uintptr, Basic_rawptr, - Basic_string, // ^u8 + int - Basic_cstring, // ^u8 + + Basic_string, // [^]u8 + int + Basic_cstring, // [^]u8 + + Basic_string16, // [^]u16 + int + Basic_cstring16, // [^]u16 + int + Basic_any, // rawptr + ^Type_Info Basic_typeid, @@ -501,8 +506,14 @@ gb_global Type basic_types[] = { {Type_Basic, {Basic_uintptr, BasicFlag_Integer | BasicFlag_Unsigned, -1, STR_LIT("uintptr")}}, {Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}}, + {Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}}, {Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}}, + + {Type_Basic, {Basic_string16, BasicFlag_String, -1, STR_LIT("string16")}}, + {Type_Basic, {Basic_cstring16, BasicFlag_String, -1, STR_LIT("cstring16")}}, + + {Type_Basic, {Basic_any, 0, 16, STR_LIT("any")}}, {Type_Basic, {Basic_typeid, 0, 8, STR_LIT("typeid")}}, @@ -592,8 +603,12 @@ gb_global Type *t_uint = &basic_types[Basic_uint]; gb_global Type *t_uintptr = &basic_types[Basic_uintptr]; gb_global Type *t_rawptr = &basic_types[Basic_rawptr]; + gb_global Type *t_string = &basic_types[Basic_string]; gb_global Type *t_cstring = &basic_types[Basic_cstring]; +gb_global Type *t_string16 = &basic_types[Basic_string16]; +gb_global Type *t_cstring16 = &basic_types[Basic_cstring16]; + gb_global Type *t_any = &basic_types[Basic_any]; gb_global Type *t_typeid = &basic_types[Basic_typeid]; @@ -631,6 +646,8 @@ gb_global Type *t_untyped_uninit = &basic_types[Basic_UntypedUninit]; gb_global Type *t_u8_ptr = nullptr; gb_global Type *t_u8_multi_ptr = nullptr; +gb_global Type *t_u16_ptr = nullptr; +gb_global Type *t_u16_multi_ptr = nullptr; gb_global Type *t_int_ptr = nullptr; gb_global Type *t_i64_ptr = nullptr; gb_global Type *t_f64_ptr = nullptr; @@ -644,6 +661,8 @@ gb_global Type *t_type_info_enum_value = nullptr; gb_global Type *t_type_info_ptr = nullptr; gb_global Type *t_type_info_enum_value_ptr = nullptr; +gb_global Type *t_type_info_string_encoding_kind = nullptr; + gb_global Type *t_type_info_named = nullptr; gb_global Type *t_type_info_integer = nullptr; gb_global Type *t_type_info_rune = nullptr; @@ -1293,6 +1312,14 @@ gb_internal bool is_type_string(Type *t) { } return false; } +gb_internal bool is_type_string16(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_string16; + } + return false; +} gb_internal bool is_type_cstring(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1301,6 +1328,14 @@ gb_internal bool is_type_cstring(Type *t) { } return false; } +gb_internal bool is_type_cstring16(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_cstring16; + } + return false; +} gb_internal bool is_type_typed(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1430,6 +1465,12 @@ gb_internal bool is_type_u8(Type *t) { } return false; } +gb_internal bool is_type_u16(Type *t) { + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_u16; + } + return false; +} gb_internal bool is_type_array(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1691,6 +1732,39 @@ gb_internal bool is_type_rune_array(Type *t) { return false; } +gb_internal bool is_type_u16_slice(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Slice) { + return is_type_u16(t->Slice.elem); + } + return false; +} +gb_internal bool is_type_u16_array(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Array) { + return is_type_u16(t->Array.elem); + } + return false; +} +gb_internal bool is_type_u16_ptr(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Pointer) { + return is_type_u16(t->Slice.elem); + } + return false; +} +gb_internal bool is_type_u16_multi_ptr(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_MultiPointer) { + return is_type_u16(t->Slice.elem); + } + return false; +} + gb_internal bool is_type_array_like(Type *t) { return is_type_array(t) || is_type_enumerated_array(t); @@ -2110,7 +2184,7 @@ gb_internal bool is_type_indexable(Type *t) { Type *bt = base_type(t); switch (bt->kind) { case Type_Basic: - return bt->Basic.kind == Basic_string; + return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16; case Type_Array: case Type_Slice: case Type_DynamicArray: @@ -2130,7 +2204,7 @@ gb_internal bool is_type_sliceable(Type *t) { Type *bt = base_type(t); switch (bt->kind) { case Type_Basic: - return bt->Basic.kind == Basic_string; + return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16; case Type_Array: case Type_Slice: case Type_DynamicArray: @@ -2377,6 +2451,7 @@ gb_internal bool type_has_nil(Type *t) { case Basic_any: return true; case Basic_cstring: + case Basic_cstring16: return true; case Basic_typeid: return true; @@ -2444,8 +2519,9 @@ gb_internal bool is_type_comparable(Type *t) { case Basic_rune: return true; case Basic_string: - return true; case Basic_cstring: + case Basic_string16: + case Basic_cstring16: return true; case Basic_typeid: return true; @@ -3831,10 +3907,12 @@ gb_internal i64 type_size_of(Type *t) { if (t->kind == Type_Basic) { GB_ASSERT_MSG(is_type_typed(t), "%s", type_to_string(t)); switch (t->Basic.kind) { - case Basic_string: size = 2*build_context.int_size; break; - case Basic_cstring: size = build_context.ptr_size; break; - case Basic_any: size = 16; break; - case Basic_typeid: size = 8; break; + case Basic_string: size = 2*build_context.int_size; break; + case Basic_cstring: size = build_context.ptr_size; break; + case Basic_string16: size = 2*build_context.int_size; break; + case Basic_cstring16: size = build_context.ptr_size; break; + case Basic_any: size = 16; break; + case Basic_typeid: size = 8; break; case Basic_int: case Basic_uint: size = build_context.int_size; @@ -3894,10 +3972,12 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) { case Type_Basic: { GB_ASSERT(is_type_typed(t)); switch (t->Basic.kind) { - case Basic_string: return build_context.int_size; - case Basic_cstring: return build_context.ptr_size; - case Basic_any: return 8; - case Basic_typeid: return 8; + case Basic_string: return build_context.int_size; + case Basic_cstring: return build_context.ptr_size; + case Basic_string16: return build_context.int_size; + case Basic_cstring16: return build_context.ptr_size; + case Basic_any: return 8; + case Basic_typeid: return 8; case Basic_int: case Basic_uint: return build_context.int_size; @@ -4145,10 +4225,12 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) { return size; } switch (kind) { - case Basic_string: return 2*build_context.int_size; - case Basic_cstring: return build_context.ptr_size; - case Basic_any: return 16; - case Basic_typeid: return 8; + case Basic_string: return 2*build_context.int_size; + case Basic_cstring: return build_context.ptr_size; + case Basic_string16: return 2*build_context.int_size; + case Basic_cstring16: return build_context.ptr_size; + case Basic_any: return 16; + case Basic_typeid: return 8; case Basic_int: case Basic_uint: return build_context.int_size; @@ -4380,6 +4462,15 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) { if (field_type_) *field_type_ = t_int; return build_context.int_size; // len } + } else if (t->Basic.kind == Basic_string16) { + switch (index) { + case 0: + if (field_type_) *field_type_ = t_u16_ptr; + return 0; // data + case 1: + if (field_type_) *field_type_ = t_int; + return build_context.int_size; // len + } } else if (t->Basic.kind == Basic_any) { switch (index) { case 0: @@ -4456,6 +4547,11 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) { case 0: t = t_rawptr; break; case 1: t = t_int; break; } + } else if (t->Basic.kind == Basic_string16) { + switch (index) { + case 0: t = t_rawptr; break; + case 1: t = t_int; break; + } } else if (t->Basic.kind == Basic_any) { switch (index) { case 0: t = t_rawptr; break; @@ -4697,6 +4793,11 @@ gb_internal Type *type_internal_index(Type *t, isize index) { GB_ASSERT(index == 0 || index == 1); return index == 0 ? t_u8_ptr : t_int; } + case Basic_string16: + { + GB_ASSERT(index == 0 || index == 1); + return index == 0 ? t_u16_ptr : t_int; + } case Basic_any: { GB_ASSERT(index == 0 || index == 1); diff --git a/tests/core/sys/windows/test_kernel32.odin b/tests/core/sys/windows/test_kernel32.odin index f6a88c769..15f3b5173 100644 --- a/tests/core/sys/windows/test_kernel32.odin +++ b/tests/core/sys/windows/test_kernel32.odin @@ -12,12 +12,12 @@ lcid_to_local :: proc(t: ^testing.T) { cc := win32.LCIDToLocaleName(lcid, &wname[0], len(wname) - 1, 0) testing.expectf(t, cc == 6, "%#x (should be: %#x)", u32(cc), 6) if cc == 0 {return} - str, err := win32.wstring_to_utf8(win32.wstring(&wname), int(cc)) + str, err := win32.wstring_to_utf8(win32.wstring(&wname[0]), int(cc)) testing.expectf(t, err == .None, "%v (should be: %x)", err, 0) exp :: "en-US" testing.expectf(t, str == exp, "%v (should be: %v)", str, exp) - cc2 := win32.LocaleNameToLCID(L(exp), 0) + cc2 := win32.LocaleNameToLCID(exp, 0) testing.expectf(t, cc2 == 0x0409, "%#x (should be: %#x)", u32(cc2), 0x0409) //fmt.printfln("%0X", lcid) diff --git a/tests/core/sys/windows/test_ole32.odin b/tests/core/sys/windows/test_ole32.odin index 8be231e1f..a0a2590b8 100644 --- a/tests/core/sys/windows/test_ole32.odin +++ b/tests/core/sys/windows/test_ole32.odin @@ -9,7 +9,7 @@ import "core:testing" string_from_clsid :: proc(t: ^testing.T) { p: win32.LPOLESTR hr := win32.StringFromCLSID(win32.CLSID_FileOpenDialog, &p) - defer if p != nil {win32.CoTaskMemFree(p)} + defer if p != nil {win32.CoTaskMemFree(rawptr(p))} testing.expectf(t, win32.SUCCEEDED(hr), "%x (should be: %x)", u32(hr), 0) testing.expectf(t, p != nil, "%v is nil", p) @@ -33,7 +33,7 @@ clsid_from_string :: proc(t: ^testing.T) { string_from_iid :: proc(t: ^testing.T) { p: win32.LPOLESTR hr := win32.StringFromIID(win32.IID_IFileDialog, &p) - defer if p != nil {win32.CoTaskMemFree(p)} + defer if p != nil {win32.CoTaskMemFree(rawptr(p))} testing.expectf(t, win32.SUCCEEDED(hr), "%x (should be: %x)", u32(hr), 0) testing.expectf(t, p != nil, "%v is nil", p) diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index 0201395f6..e2ab9cde0 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -12,11 +12,11 @@ UTF16_Vector :: struct { utf16_vectors := []UTF16_Vector{ { - intrinsics.constant_utf16_cstring("Hellope, World!"), + "Hellope, World!", "Hellope, World!", }, { - intrinsics.constant_utf16_cstring("Hellope\x00, World!"), + "Hellope\x00, World!", "Hellope", }, } @@ -27,7 +27,8 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) { buf := make([]u8, len(test.ustr)) defer delete(buf) - res := win32.utf16_to_utf8_buf(buf[:], test.wstr[:len(test.ustr)]) + wstr := string16(test.wstr) + res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr) testing.expect_value(t, res, test.ustr) } }
\ No newline at end of file |