aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2025-08-06 16:09:18 +0100
committerGitHub <noreply@github.com>2025-08-06 16:09:18 +0100
commit09a1e170bc92a0ea48a8ee67599c2936e924fe4d (patch)
tree92b44b34a1f2f0c4a8c96a49ab61bb5177432ed7 /src
parentec7509430369eb5d57a081507792dc03b1c05bab (diff)
parentaf3184adc96cef59fff986ea6400caa6dbdb56ae (diff)
Merge pull request #5530 from odin-lang/bill/utf16-strings
UTF-16 string types: `string16` & `cstring16`
Diffstat (limited to 'src')
-rw-r--r--src/build_settings.cpp8
-rw-r--r--src/cached.cpp2
-rw-r--r--src/check_builtin.cpp26
-rw-r--r--src/check_decl.cpp6
-rw-r--r--src/check_expr.cpp130
-rw-r--r--src/check_stmt.cpp26
-rw-r--r--src/checker.cpp19
-rw-r--r--src/checker_builtin_procs.hpp2
-rw-r--r--src/common.cpp3
-rw-r--r--src/exact_value.cpp52
-rw-r--r--src/llvm_backend.cpp6
-rw-r--r--src/llvm_backend.hpp3
-rw-r--r--src/llvm_backend_const.cpp88
-rw-r--r--src/llvm_backend_debug.cpp14
-rw-r--r--src/llvm_backend_expr.cpp142
-rw-r--r--src/llvm_backend_general.cpp137
-rw-r--r--src/llvm_backend_proc.cpp9
-rw-r--r--src/llvm_backend_stmt.cpp121
-rw-r--r--src/llvm_backend_type.cpp38
-rw-r--r--src/llvm_backend_utility.cpp48
-rw-r--r--src/main.cpp8
-rw-r--r--src/microsoft_craziness.h4
-rw-r--r--src/path.cpp8
-rw-r--r--src/string.cpp181
-rw-r--r--src/string16_map.cpp538
-rw-r--r--src/types.cpp135
26 files changed, 1666 insertions, 88 deletions
diff --git a/src/build_settings.cpp b/src/build_settings.cpp
index 46a4f9ae5..40bbe41e5 100644
--- a/src/build_settings.cpp
+++ b/src/build_settings.cpp
@@ -1089,7 +1089,7 @@ gb_internal String internal_odin_root_dir(void) {
text = gb_alloc_array(permanent_allocator(), wchar_t, len+1);
GetModuleFileNameW(nullptr, text, cast(int)len);
- path = string16_to_string(heap_allocator(), make_string16(text, len));
+ path = string16_to_string(heap_allocator(), make_string16(cast(u16 *)text, len));
for (i = path.len-1; i >= 0; i--) {
u8 c = path[i];
@@ -1387,14 +1387,14 @@ gb_internal String path_to_fullpath(gbAllocator a, String s, bool *ok_) {
mutex_lock(&fullpath_mutex);
- len = GetFullPathNameW(&string16[0], 0, nullptr, nullptr);
+ len = GetFullPathNameW(cast(wchar_t *)&string16[0], 0, nullptr, nullptr);
if (len != 0) {
wchar_t *text = gb_alloc_array(permanent_allocator(), wchar_t, len+1);
- GetFullPathNameW(&string16[0], len, text, nullptr);
+ GetFullPathNameW(cast(wchar_t *)&string16[0], len, text, nullptr);
mutex_unlock(&fullpath_mutex);
text[len] = 0;
- result = string16_to_string(a, make_string16(text, len));
+ result = string16_to_string(a, make_string16(cast(u16 *)text, len));
result = string_trim_whitespace(result);
// Replace Windows style separators
diff --git a/src/cached.cpp b/src/cached.cpp
index efdadce7b..61b5d01b4 100644
--- a/src/cached.cpp
+++ b/src/cached.cpp
@@ -231,7 +231,7 @@ Array<String> cache_gather_envs() {
wchar_t *curr_string = strings;
while (curr_string && *curr_string) {
- String16 wstr = make_string16_c(curr_string);
+ String16 wstr = make_string16_c(cast(u16 *)curr_string);
curr_string += wstr.len+1;
String str = string16_to_string(temporary_allocator(), wstr);
if (string_starts_with(str, str_lit("CURR_DATE_TIME="))) {
diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp
index 880de73f5..57413f519 100644
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool
is_type_complex,
is_type_quaternion,
is_type_string,
+ is_type_string16,
is_type_typeid,
is_type_any,
is_type_endian_platform,
@@ -2328,13 +2329,23 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
if (is_type_string(op_type) && id == BuiltinProc_len) {
if (operand->mode == Addressing_Constant) {
mode = Addressing_Constant;
- String str = operand->value.value_string;
- value = exact_value_i64(str.len);
+
+ if (operand->value.kind == ExactValue_String) {
+ String str = operand->value.value_string;
+ value = exact_value_i64(str.len);
+ } else if (operand->value.kind == ExactValue_String16) {
+ String16 str = operand->value.value_string16;
+ value = exact_value_i64(str.len);
+ } else {
+ GB_PANIC("Unhandled value kind: %d", operand->value.kind);
+ }
type = t_untyped_integer;
} else {
mode = Addressing_Value;
if (is_type_cstring(op_type)) {
add_package_dependency(c, "runtime", "cstring_len");
+ } else if (is_type_cstring16(op_type)) {
+ add_package_dependency(c, "runtime", "cstring16_len");
}
}
} else if (is_type_array(op_type)) {
@@ -4684,7 +4695,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
break;
case Type_Basic:
if (t->Basic.kind == Basic_string) {
- operand->type = alloc_type_multi_pointer(t_u8);
+ operand->type = t_u8_multi_ptr;
+ } else if (t->Basic.kind == Basic_string16) {
+ operand->type = t_u16_multi_ptr;
}
break;
case Type_Pointer:
@@ -6133,6 +6146,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
case BuiltinProc_type_is_complex:
case BuiltinProc_type_is_quaternion:
case BuiltinProc_type_is_string:
+ case BuiltinProc_type_is_string16:
case BuiltinProc_type_is_typeid:
case BuiltinProc_type_is_any:
case BuiltinProc_type_is_endian_platform:
@@ -7172,7 +7186,11 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
return false;
}
operand->mode = Addressing_Value;
- operand->type = alloc_type_multi_pointer(t_u16);
+ if (type_hint != nullptr && is_type_cstring16(type_hint)) {
+ operand->type = type_hint;
+ } else {
+ operand->type = alloc_type_multi_pointer(t_u16);
+ }
operand->value = {};
break;
}
diff --git a/src/check_decl.cpp b/src/check_decl.cpp
index dd4c09e85..af46ee40e 100644
--- a/src/check_decl.cpp
+++ b/src/check_decl.cpp
@@ -815,6 +815,12 @@ gb_internal bool signature_parameter_similar_enough(Type *x, Type *y) {
if (sig_compare(is_type_cstring, is_type_u8_multi_ptr, x, y)) {
return true;
}
+ if (sig_compare(is_type_cstring16, is_type_u16_ptr, x, y)) {
+ return true;
+ }
+ if (sig_compare(is_type_cstring16, is_type_u16_multi_ptr, x, y)) {
+ return true;
+ }
if (sig_compare(is_type_uintptr, is_type_rawptr, x, y)) {
return true;
diff --git a/src/check_expr.cpp b/src/check_expr.cpp
index 51fb5511b..faa338f36 100644
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
} else if (is_type_boolean(type)) {
return in_value.kind == ExactValue_Bool;
} else if (is_type_string(type)) {
+ if (in_value.kind == ExactValue_String16) {
+ return is_type_string16(type) || is_type_cstring16(type);
+ }
return in_value.kind == ExactValue_String;
} else if (is_type_integer(type) || is_type_rune(type)) {
if (in_value.kind == ExactValue_Bool) {
@@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
if (in_value.kind == ExactValue_String) {
return false;
}
+ if (in_value.kind == ExactValue_String16) {
+ return false;
+ }
if (out_value) *out_value = in_value;
} else if (is_type_bit_set(type)) {
if (in_value.kind == ExactValue_Integer) {
@@ -2862,6 +2868,14 @@ gb_internal void add_comparison_procedures_for_fields(CheckerContext *c, Type *t
add_package_dependency(c, "runtime", "string_eq");
add_package_dependency(c, "runtime", "string_ne");
break;
+ case Basic_cstring16:
+ add_package_dependency(c, "runtime", "cstring16_eq");
+ add_package_dependency(c, "runtime", "cstring16_ne");
+ break;
+ case Basic_string16:
+ add_package_dependency(c, "runtime", "string16_eq");
+ add_package_dependency(c, "runtime", "string16_ne");
+ break;
}
break;
case Type_Struct:
@@ -3035,6 +3049,24 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
case Token_LtEq: add_package_dependency(c, "runtime", "cstring_le"); break;
case Token_GtEq: add_package_dependency(c, "runtime", "cstring_gt"); break;
}
+ } else if (is_type_cstring16(x->type) && is_type_cstring16(y->type)) {
+ switch (op) {
+ case Token_CmpEq: add_package_dependency(c, "runtime", "cstring16_eq"); break;
+ case Token_NotEq: add_package_dependency(c, "runtime", "cstring16_ne"); break;
+ case Token_Lt: add_package_dependency(c, "runtime", "cstring16_lt"); break;
+ case Token_Gt: add_package_dependency(c, "runtime", "cstring16_gt"); break;
+ case Token_LtEq: add_package_dependency(c, "runtime", "cstring16_le"); break;
+ case Token_GtEq: add_package_dependency(c, "runtime", "cstring16_gt"); break;
+ }
+ } else if (is_type_string16(x->type) || is_type_string16(y->type)) {
+ switch (op) {
+ case Token_CmpEq: add_package_dependency(c, "runtime", "string16_eq"); break;
+ case Token_NotEq: add_package_dependency(c, "runtime", "string16_ne"); break;
+ case Token_Lt: add_package_dependency(c, "runtime", "string16_lt"); break;
+ case Token_Gt: add_package_dependency(c, "runtime", "string16_gt"); break;
+ case Token_LtEq: add_package_dependency(c, "runtime", "string16_le"); break;
+ case Token_GtEq: add_package_dependency(c, "runtime", "string16_gt"); break;
+ }
} else if (is_type_string(x->type) || is_type_string(y->type)) {
switch (op) {
case Token_CmpEq: add_package_dependency(c, "runtime", "string_eq"); break;
@@ -3340,6 +3372,11 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
return true;
}
+ // []u16 <-> string16 (not cstring16)
+ if (is_type_u16_slice(src) && (is_type_string16(dst) && !is_type_cstring16(dst))) {
+ return true;
+ }
+
// cstring -> string
if (are_types_identical(src, t_cstring) && are_types_identical(dst, t_string)) {
if (operand->mode != Addressing_Constant) {
@@ -3347,6 +3384,14 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
}
return true;
}
+ // cstring16 -> string16
+ if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) {
+ if (operand->mode != Addressing_Constant) {
+ add_package_dependency(c, "runtime", "cstring16_to_string16");
+ }
+ return true;
+ }
+
// cstring -> ^u8
if (are_types_identical(src, t_cstring) && is_type_u8_ptr(dst)) {
return !is_constant;
@@ -3372,6 +3417,34 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
if (is_type_rawptr(src) && are_types_identical(dst, t_cstring)) {
return !is_constant;
}
+
+ // cstring -> ^u16
+ if (are_types_identical(src, t_cstring16) && is_type_u16_ptr(dst)) {
+ return !is_constant;
+ }
+ // cstring -> [^]u16
+ if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) {
+ return !is_constant;
+ }
+ // cstring16 -> rawptr
+ if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) {
+ return !is_constant;
+ }
+
+
+ // ^u16 -> cstring16
+ if (is_type_u16_ptr(src) && are_types_identical(dst, t_cstring16)) {
+ return !is_constant;
+ }
+ // [^]u16 -> cstring
+ if (is_type_u16_multi_ptr(src) && are_types_identical(dst, t_cstring16)) {
+ return !is_constant;
+ }
+ // rawptr -> cstring16
+ if (is_type_rawptr(src) && are_types_identical(dst, t_cstring16)) {
+ return !is_constant;
+ }
+
// proc <-> proc
if (is_type_proc(src) && is_type_proc(dst)) {
if (is_type_polymorphic(dst)) {
@@ -4558,6 +4631,8 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar
// target_type = t_untyped_nil;
} else if (is_type_cstring(target_type)) {
// target_type = t_untyped_nil;
+ } else if (is_type_cstring16(target_type)) {
+ // target_type = t_untyped_nil;
} else if (!type_has_nil(target_type)) {
operand->mode = Addressing_Invalid;
convert_untyped_error(c, operand, target_type);
@@ -4585,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar
break;
}
}
+ } else if (operand->value.kind == ExactValue_String16) {
+ String16 s = operand->value.value_string16;
+ if (is_type_u16_array(t)) {
+ if (s.len == t->Array.count) {
+ break;
+ }
+ }
}
operand->mode = Addressing_Invalid;
convert_untyped_error(c, operand, target_type);
@@ -4914,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v
if (success_) *success_ = true;
if (finish_) *finish_ = true;
return exact_value_u64(val);
+ } else if (value.kind == ExactValue_String16) {
+ GB_ASSERT(0 <= index && index < value.value_string.len);
+ u16 val = value.value_string16[index];
+ if (success_) *success_ = true;
+ if (finish_) *finish_ = true;
+ return exact_value_u64(val);
}
if (value.kind != ExactValue_Compound) {
if (success_) *success_ = true;
@@ -8226,6 +8314,7 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64
case Type_Basic:
if (t->Basic.kind == Basic_string) {
if (o->mode == Addressing_Constant) {
+ GB_ASSERT(o->value.kind == ExactValue_String);
*max_count = o->value.value_string.len;
}
if (o->mode != Addressing_Constant) {
@@ -8233,6 +8322,16 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64
}
o->type = t_u8;
return true;
+ } else if (t->Basic.kind == Basic_string16) {
+ if (o->mode == Addressing_Constant) {
+ GB_ASSERT(o->value.kind == ExactValue_String16);
+ *max_count = o->value.value_string16.len;
+ }
+ if (o->mode != Addressing_Constant) {
+ o->mode = Addressing_Value;
+ }
+ o->type = t_u16;
+ return true;
} else if (t->Basic.kind == Basic_UntypedString) {
if (o->mode == Addressing_Constant) {
*max_count = o->value.value_string.len;
@@ -10879,9 +10978,17 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node,
if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) {
valid = true;
if (o->mode == Addressing_Constant) {
+ GB_ASSERT(o->value.kind == ExactValue_String);
max_count = o->value.value_string.len;
}
o->type = type_deref(o->type);
+ } else if (t->Basic.kind == Basic_string16) {
+ valid = true;
+ if (o->mode == Addressing_Constant) {
+ GB_ASSERT(o->value.kind == ExactValue_String16);
+ max_count = o->value.value_string16.len;
+ }
+ o->type = type_deref(o->type);
}
break;
@@ -11036,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node,
o->expr = node;
return kind;
}
-
- String s = {};
- if (o->value.kind == ExactValue_String) {
- s = o->value.value_string;
- }
-
o->mode = Addressing_Constant;
o->type = t;
- o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+
+ if (o->value.kind == ExactValue_String16) {
+ String16 s = o->value.value_string16;
+
+ o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+ } else {
+ String s = {};
+ if (o->value.kind == ExactValue_String) {
+ s = o->value.value_string;
+ }
+
+ o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+ }
}
return kind;
}
@@ -11133,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast
Type *t = t_invalid;
switch (node->tav.value.kind) {
case ExactValue_String: t = t_untyped_string; break;
+ case ExactValue_String16: t = t_string16; break; // TODO(bill): determine this correctly
case ExactValue_Float: t = t_untyped_float; break;
case ExactValue_Complex: t = t_untyped_complex; break;
case ExactValue_Quaternion: t = t_untyped_quaternion; break;
@@ -11569,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) {
return !v.value_bool;
case ExactValue_String:
return v.value_string.len == 0;
+ case ExactValue_String16:
+ return v.value_string16.len == 0;
case ExactValue_Integer:
return big_int_is_zero(&v.value_integer);
case ExactValue_Float:
diff --git a/src/check_stmt.cpp b/src/check_stmt.cpp
index bc9b6c5dd..ae88ff333 100644
--- a/src/check_stmt.cpp
+++ b/src/check_stmt.cpp
@@ -974,7 +974,14 @@ gb_internal void check_unroll_range_stmt(CheckerContext *ctx, Ast *node, u32 mod
Type *t = base_type(operand.type);
switch (t->kind) {
case Type_Basic:
- if (is_type_string(t) && t->Basic.kind != Basic_cstring) {
+ if (is_type_string16(t) && t->Basic.kind != Basic_cstring) {
+ val0 = t_rune;
+ val1 = t_int;
+ inline_for_depth = exact_value_i64(operand.value.value_string.len);
+ if (unroll_count > 0) {
+ error(node, "#unroll(%lld) does not support strings", cast(long long)unroll_count);
+ }
+ } else if (is_type_string(t) && t->Basic.kind != Basic_cstring) {
val0 = t_rune;
val1 = t_int;
inline_for_depth = exact_value_i64(operand.value.value_string.len);
@@ -1236,7 +1243,11 @@ gb_internal void check_switch_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags
add_to_seen_map(ctx, &seen, upper_op, x, lhs, rhs);
- if (is_type_string(x.type)) {
+ if (is_type_string16(x.type)) {
+ // NOTE(bill): Force dependency for strings here
+ add_package_dependency(ctx, "runtime", "string16_le");
+ add_package_dependency(ctx, "runtime", "string16_lt");
+ } else if (is_type_string(x.type)) {
// NOTE(bill): Force dependency for strings here
add_package_dependency(ctx, "runtime", "string_le");
add_package_dependency(ctx, "runtime", "string_lt");
@@ -1770,7 +1781,16 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
switch (t->kind) {
case Type_Basic:
- if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) {
+ if (t->Basic.kind == Basic_string16) {
+ is_possibly_addressable = false;
+ array_add(&vals, t_rune);
+ array_add(&vals, t_int);
+ if (is_reverse) {
+ add_package_dependency(ctx, "runtime", "string16_decode_last_rune");
+ } else {
+ add_package_dependency(ctx, "runtime", "string16_decode_rune");
+ }
+ } else if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) {
is_possibly_addressable = false;
array_add(&vals, t_rune);
array_add(&vals, t_int);
diff --git a/src/checker.cpp b/src/checker.cpp
index dbe2af866..e72061f56 100644
--- a/src/checker.cpp
+++ b/src/checker.cpp
@@ -1363,13 +1363,15 @@ gb_internal void init_universal(void) {
}
- t_u8_ptr = alloc_type_pointer(t_u8);
- t_u8_multi_ptr = alloc_type_multi_pointer(t_u8);
- t_int_ptr = alloc_type_pointer(t_int);
- t_i64_ptr = alloc_type_pointer(t_i64);
- t_f64_ptr = alloc_type_pointer(t_f64);
- t_u8_slice = alloc_type_slice(t_u8);
- t_string_slice = alloc_type_slice(t_string);
+ t_u8_ptr = alloc_type_pointer(t_u8);
+ t_u8_multi_ptr = alloc_type_multi_pointer(t_u8);
+ t_u16_ptr = alloc_type_pointer(t_u16);
+ t_u16_multi_ptr = alloc_type_multi_pointer(t_u16);
+ t_int_ptr = alloc_type_pointer(t_int);
+ t_i64_ptr = alloc_type_pointer(t_i64);
+ t_f64_ptr = alloc_type_pointer(t_f64);
+ t_u8_slice = alloc_type_slice(t_u8);
+ t_string_slice = alloc_type_slice(t_string);
// intrinsics types for objective-c stuff
{
@@ -3099,6 +3101,9 @@ gb_internal void init_core_type_info(Checker *c) {
GB_ASSERT(tis->fields.count == 5);
+ Entity *type_info_string_encoding_kind = find_core_entity(c, str_lit("Type_Info_String_Encoding_Kind"));
+ t_type_info_string_encoding_kind = type_info_string_encoding_kind->type;
+
Entity *type_info_variant = tis->fields[4];
Type *tiv_type = type_info_variant->type;
GB_ASSERT(is_type_union(tiv_type));
diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp
index e9655e88a..b8b105fd2 100644
--- a/src/checker_builtin_procs.hpp
+++ b/src/checker_builtin_procs.hpp
@@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin,
BuiltinProc_type_is_complex,
BuiltinProc_type_is_quaternion,
BuiltinProc_type_is_string,
+ BuiltinProc_type_is_string16,
BuiltinProc_type_is_typeid,
BuiltinProc_type_is_any,
@@ -608,6 +609,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
{STR_LIT("type_is_complex"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_quaternion"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_string"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+ {STR_LIT("type_is_string16"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_typeid"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
diff --git a/src/common.cpp b/src/common.cpp
index ad1e5a851..53848cacf 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -350,6 +350,7 @@ gb_global bool global_module_path_set = false;
#include "ptr_map.cpp"
#include "ptr_set.cpp"
#include "string_map.cpp"
+#include "string16_map.cpp"
#include "string_set.cpp"
#include "priority_queue.cpp"
#include "thread_pool.cpp"
@@ -669,7 +670,7 @@ gb_internal gb_inline f64 gb_sqrt(f64 x) {
gb_internal wchar_t **command_line_to_wargv(wchar_t *cmd_line, int *_argc) {
u32 i, j;
- u32 len = cast(u32)string16_len(cmd_line);
+ u32 len = cast(u32)string16_len(cast(u16 *)cmd_line);
i = ((len+2)/2)*gb_size_of(void *) + gb_size_of(void *);
wchar_t **argv = cast(wchar_t **)GlobalAlloc(GMEM_FIXED, i + (len+2)*gb_size_of(wchar_t));
diff --git a/src/exact_value.cpp b/src/exact_value.cpp
index 37751c8f1..f2aed84c2 100644
--- a/src/exact_value.cpp
+++ b/src/exact_value.cpp
@@ -29,6 +29,7 @@ enum ExactValueKind {
ExactValue_Compound = 8,
ExactValue_Procedure = 9,
ExactValue_Typeid = 10,
+ ExactValue_String16 = 11,
ExactValue_Count,
};
@@ -46,6 +47,7 @@ struct ExactValue {
Ast * value_compound;
Ast * value_procedure;
Type * value_typeid;
+ String16 value_string16;
};
};
@@ -66,6 +68,9 @@ gb_internal uintptr hash_exact_value(ExactValue v) {
case ExactValue_String:
res = gb_fnv32a(v.value_string.text, v.value_string.len);
break;
+ case ExactValue_String16:
+ res = gb_fnv32a(v.value_string.text, v.value_string.len*gb_size_of(u16));
+ break;
case ExactValue_Integer:
{
u32 key = gb_fnv32a(v.value_integer.dp, gb_size_of(*v.value_integer.dp) * v.value_integer.used);
@@ -118,6 +123,11 @@ gb_internal ExactValue exact_value_string(String string) {
result.value_string = string;
return result;
}
+gb_internal ExactValue exact_value_string16(String16 string) {
+ ExactValue result = {ExactValue_String16};
+ result.value_string16 = string;
+ return result;
+}
gb_internal ExactValue exact_value_i64(i64 i) {
ExactValue result = {ExactValue_Integer};
@@ -656,6 +666,7 @@ gb_internal i32 exact_value_order(ExactValue const &v) {
return 0;
case ExactValue_Bool:
case ExactValue_String:
+ case ExactValue_String16:
return 1;
case ExactValue_Integer:
return 2;
@@ -689,6 +700,7 @@ gb_internal void match_exact_values(ExactValue *x, ExactValue *y) {
case ExactValue_Bool:
case ExactValue_String:
+ case ExactValue_String16:
case ExactValue_Quaternion:
case ExactValue_Pointer:
case ExactValue_Compound:
@@ -891,7 +903,18 @@ gb_internal ExactValue exact_binary_operator_value(TokenKind op, ExactValue x, E
gb_memmove(data, sx.text, sx.len);
gb_memmove(data+sx.len, sy.text, sy.len);
return exact_value_string(make_string(data, len));
- break;
+ }
+ case ExactValue_String16: {
+ if (op != Token_Add) goto error;
+
+ // NOTE(bill): How do you minimize this over allocation?
+ String sx = x.value_string;
+ String sy = y.value_string;
+ isize len = sx.len+sy.len;
+ u16 *data = gb_alloc_array(permanent_allocator(), u16, len);
+ gb_memmove(data, sx.text, sx.len*gb_size_of(u16));
+ gb_memmove(data+sx.len, sy.text, sy.len*gb_size_of(u16));
+ return exact_value_string16(make_string16(data, len));
}
}
@@ -994,6 +1017,19 @@ gb_internal bool compare_exact_values(TokenKind op, ExactValue x, ExactValue y)
}
break;
}
+ case ExactValue_String16: {
+ String16 a = x.value_string16;
+ String16 b = y.value_string16;
+ switch (op) {
+ case Token_CmpEq: return a == b;
+ case Token_NotEq: return a != b;
+ case Token_Lt: return a < b;
+ case Token_LtEq: return a <= b;
+ case Token_Gt: return a > b;
+ case Token_GtEq: return a >= b;
+ }
+ break;
+ }
case ExactValue_Pointer: {
switch (op) {
@@ -1050,6 +1086,20 @@ gb_internal gbString write_exact_value_to_string(gbString str, ExactValue const
gb_free(heap_allocator(), s.text);
return str;
}
+ case ExactValue_String16: {
+ String s = quote_to_ascii(heap_allocator(), v.value_string16);
+ string_limit = gb_max(string_limit, 36);
+ if (s.len <= string_limit) {
+ str = gb_string_append_length(str, s.text, s.len);
+ } else {
+ isize n = string_limit/5;
+ str = gb_string_append_length(str, s.text, n);
+ str = gb_string_append_fmt(str, "\"..%lld chars..\"", s.len-(2*n));
+ str = gb_string_append_length(str, s.text+s.len-n, n);
+ }
+ gb_free(heap_allocator(), s.text);
+ return str;
+ }
case ExactValue_Integer: {
String s = big_int_to_string(heap_allocator(), &v.value_integer);
str = gb_string_append_length(str, s.text, s.len);
diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp
index 13a1d8cf3..f37415cc1 100644
--- a/src/llvm_backend.cpp
+++ b/src/llvm_backend.cpp
@@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) {
case Basic_string:
return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}");
+ case Basic_string16:
+ return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}");
+
case Basic_cstring: return str_lit("*");
+ case Basic_cstring16: return str_lit("*");
+
+
case Basic_any: return str_lit("{any=^v^v}"); // rawptr + ^Type_Info
case Basic_typeid:
diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp
index fef6e754d..648e8a732 100644
--- a/src/llvm_backend.hpp
+++ b/src/llvm_backend.hpp
@@ -173,7 +173,8 @@ struct lbModule {
PtrMap<LLVMValueRef, Entity *> procedure_values;
Array<lbProcedure *> missing_procedures_to_check;
- StringMap<LLVMValueRef> const_strings;
+ StringMap<LLVMValueRef> const_strings;
+ String16Map<LLVMValueRef> const_string16s;
PtrMap<u64/*type hash*/, struct lbFunctionType *> function_type_map;
diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp
index c3112934e..e64be49f2 100644
--- a/src/llvm_backend_const.cpp
+++ b/src/llvm_backend_const.cpp
@@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) {
gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+ GB_ASSERT(!is_type_string16(t));
+ if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
+ LLVMValueRef values[3] = {
+ data,
+ LLVMConstNull(lb_type(m, t_i32)),
+ len,
+ };
+ return llvm_const_named_struct_internal(lb_type(m, t), values, 3);
+ } else {
+ LLVMValueRef values[2] = {
+ data,
+ len,
+ };
+ return llvm_const_named_struct_internal(lb_type(m, t), values, 2);
+ }
+}
+
+gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+ GB_ASSERT(is_type_string16(t));
if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
LLVMValueRef values[3] = {
data,
@@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) {
return lb_const_value(m, t_string, exact_value_string(value));
}
+gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) {
+ return lb_const_value(m, t_string16, exact_value_string16(value));
+}
+
gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) {
lbValue res = {};
@@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
GB_ASSERT(is_type_slice(type));
res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value;
return res;
- } else {
+ } else if (value.kind == ExactValue_String16) {
+ GB_ASSERT(is_type_slice(type));
+ res.value = lb_find_or_add_entity_string16_slice_with_type(m, value.value_string16, original_type).value;
+ return res;
+ }else {
ast_node(cl, CompoundLit, value.value_compound);
isize count = cl->elems.count;
@@ -751,29 +778,78 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
{
bool custom_link_section = cc.link_section.len > 0;
- LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+ LLVMValueRef ptr = nullptr;
lbValue res = {};
res.type = default_type(original_type);
+ isize len = value.value_string.len;
+
+ if (is_type_string16(res.type) || is_type_cstring16(res.type)) {
+ TEMPORARY_ALLOCATOR_GUARD();
+ String16 s16 = string_to_string16(temporary_allocator(), value.value_string);
+ len = s16.len;
+ ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section);
+ } else {
+ ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+ }
+
if (custom_link_section) {
LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
}
- if (is_type_cstring(res.type)) {
+ if (is_type_cstring(res.type) || is_type_cstring16(res.type)) {
res.value = ptr;
} else {
- if (value.value_string.len == 0) {
+ if (len == 0) {
+ if (is_type_string16(res.type)) {
+ ptr = LLVMConstNull(lb_type(m, t_u16_ptr));
+ } else {
+ ptr = LLVMConstNull(lb_type(m, t_u8_ptr));
+ }
+ }
+ LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), len, true);
+ GB_ASSERT(is_type_string(original_type));
+
+ if (is_type_string16(res.type)) {
+ res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
+ } else {
+ res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+ }
+ }
+
+ return res;
+ }
+
+ case ExactValue_String16:
+ {
+ GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type));
+
+ bool custom_link_section = cc.link_section.len > 0;
+
+ LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section);
+ lbValue res = {};
+ res.type = default_type(original_type);
+
+ if (custom_link_section) {
+ LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
+ }
+
+ if (is_type_cstring16(res.type)) {
+ res.value = ptr;
+ } else {
+ if (value.value_string16.len == 0) {
ptr = LLVMConstNull(lb_type(m, t_u8_ptr));
}
- LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true);
+ LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true);
GB_ASSERT(is_type_string(original_type));
- res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+ res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
}
return res;
}
+
case ExactValue_Integer:
if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) {
LLVMTypeRef t = lb_type(m, original_type);
diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp
index 024c5564e..182920fc7 100644
--- a/src/llvm_backend_debug.cpp
+++ b/src/llvm_backend_debug.cpp
@@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned);
return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7);
}
+
+ case Basic_string16:
+ {
+ LLVMMetadataRef elements[2] = {};
+ elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0);
+ elements[1] = lb_debug_struct_field(m, str_lit("len"), t_int, int_bits);
+ return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements));
+ }
+ case Basic_cstring16:
+ {
+ LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned);
+ return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7);
+ }
+
case Basic_any:
{
LLVMMetadataRef elements[2] = {};
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index 74aea82f1..5425572c7 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -1559,16 +1559,24 @@ gb_internal lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) {
return lb_emit_conv(p, cmp, type);
} else if (lb_is_empty_string_constant(be->right) && !is_type_union(be->left->tav.type)) {
// `x == ""` or `x != ""`
+ Type *str_type = t_string;
+ if (is_type_string16(be->left->tav.type) || is_type_cstring16(be->left->tav.type)) {
+ str_type = t_string16;
+ }
lbValue s = lb_build_expr(p, be->left);
- s = lb_emit_conv(p, s, t_string);
+ s = lb_emit_conv(p, s, str_type);
lbValue len = lb_string_len(p, s);
lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0));
Type *type = default_type(tv.type);
return lb_emit_conv(p, cmp, type);
} else if (lb_is_empty_string_constant(be->left) && !is_type_union(be->right->tav.type)) {
// `"" == x` or `"" != x`
+ Type *str_type = t_string;
+ if (is_type_string16(be->right->tav.type) || is_type_cstring16(be->right->tav.type)) {
+ str_type = t_string16;
+ }
lbValue s = lb_build_expr(p, be->right);
- s = lb_emit_conv(p, s, t_string);
+ s = lb_emit_conv(p, s, str_type);
lbValue len = lb_string_len(p, s);
lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0));
Type *type = default_type(tv.type);
@@ -1656,6 +1664,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
res.type = t;
res.value = llvm_cstring(m, str);
return res;
+ } else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) {
+ GB_PANIC("TODO(bill): UTF-16 string");
}
// if (is_type_float(dst)) {
// return value;
@@ -1795,6 +1805,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
}
+
+ if (is_type_cstring16(src) && is_type_u16_ptr(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+ if (is_type_u16_ptr(src) && is_type_cstring16(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+ if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+ if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+ if (is_type_cstring16(src) && is_type_rawptr(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+ if (is_type_rawptr(src) && is_type_cstring16(dst)) {
+ return lb_emit_transmute(p, value, dst);
+ }
+
+ if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) {
+ TEMPORARY_ALLOCATOR_GUARD();
+
+ lbValue c = lb_emit_conv(p, value, t_cstring16);
+ auto args = array_make<lbValue>(temporary_allocator(), 1);
+ args[0] = c;
+ lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args);
+ return lb_emit_conv(p, s, dst);
+ }
+
+
+
// integer -> boolean
if (is_type_integer(src) && is_type_boolean(dst)) {
lbValue res = {};
@@ -2296,6 +2338,29 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return res;
}
+ // [^]u16 <-> cstring16
+ if (is_type_u16_multi_ptr(src) && is_type_cstring16(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+ if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+ if (is_type_u16_ptr(src) && is_type_cstring16(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+ if (is_type_cstring16(src) && is_type_u16_ptr(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+
+
+ // []u16 <-> string16
+ if (is_type_u16_slice(src) && is_type_string16(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+ if (is_type_string16(src) && is_type_u16_slice(dst)) {
+ return lb_emit_transmute(p, value, t);
+ }
+
// []byte/[]u8 <-> string
if (is_type_u8_slice(src) && is_type_string(dst)) {
return lb_emit_transmute(p, value, t);
@@ -2304,6 +2369,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return lb_emit_transmute(p, value, t);
}
+
if (is_type_array_like(dst)) {
Type *elem = base_array_type(dst);
isize index_count = cast(isize)get_array_type_count(dst);
@@ -2710,7 +2776,53 @@ gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left
return lb_compare_records(p, op_kind, left, right, b);
}
+
+ if (is_type_string16(a) || is_type_cstring16(a)) {
+ if (is_type_cstring16(a) && is_type_cstring16(b)) {
+ left = lb_emit_conv(p, left, t_cstring16);
+ right = lb_emit_conv(p, right, t_cstring16);
+ char const *runtime_procedure = nullptr;
+ switch (op_kind) {
+ case Token_CmpEq: runtime_procedure = "cstring16_eq"; break;
+ case Token_NotEq: runtime_procedure = "cstring16_ne"; break;
+ case Token_Lt: runtime_procedure = "cstring16_lt"; break;
+ case Token_Gt: runtime_procedure = "cstring16_gt"; break;
+ case Token_LtEq: runtime_procedure = "cstring16_le"; break;
+ case Token_GtEq: runtime_procedure = "cstring16_ge"; break;
+ }
+ GB_ASSERT(runtime_procedure != nullptr);
+
+ auto args = array_make<lbValue>(permanent_allocator(), 2);
+ args[0] = left;
+ args[1] = right;
+ return lb_emit_runtime_call(p, runtime_procedure, args);
+ }
+
+
+ if (is_type_cstring16(a) ^ is_type_cstring16(b)) {
+ left = lb_emit_conv(p, left, t_string16);
+ right = lb_emit_conv(p, right, t_string16);
+ }
+
+ char const *runtime_procedure = nullptr;
+ switch (op_kind) {
+ case Token_CmpEq: runtime_procedure = "string16_eq"; break;
+ case Token_NotEq: runtime_procedure = "string16_ne"; break;
+ case Token_Lt: runtime_procedure = "string16_lt"; break;
+ case Token_Gt: runtime_procedure = "string16_gt"; break;
+ case Token_LtEq: runtime_procedure = "string16_le"; break;
+ case Token_GtEq: runtime_procedure = "string16_ge"; break;
+ }
+ GB_ASSERT(runtime_procedure != nullptr);
+
+ auto args = array_make<lbValue>(permanent_allocator(), 2);
+ args[0] = left;
+ args[1] = right;
+ return lb_emit_runtime_call(p, runtime_procedure, args);
+ }
+
if (is_type_string(a)) {
+
if (is_type_cstring(a) && is_type_cstring(b)) {
left = lb_emit_conv(p, left, t_cstring);
right = lb_emit_conv(p, right, t_cstring);
@@ -3056,6 +3168,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind,
res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
}
return res;
+ case Basic_cstring16:
+ if (op_kind == Token_CmpEq) {
+ res.value = LLVMBuildIsNull(p->builder, x.value, "");
+ } else if (op_kind == Token_NotEq) {
+ res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
+ }
+ return res;
case Basic_any:
{
// TODO(bill): is this correct behaviour for nil comparison for any?
@@ -4298,12 +4417,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) {
}
- case Type_Basic: { // Basic_string
+ case Type_Basic: { // Basic_string/Basic_string16
lbValue str;
lbValue elem;
lbValue len;
lbValue index;
+
str = lb_build_expr(p, ie->expr);
if (deref) {
str = lb_emit_load(p, str);
@@ -4432,6 +4552,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) {
}
case Type_Basic: {
+ if (is_type_string16(type)) {
+ GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type));
+ lbValue len = lb_string_len(p, base);
+ if (high.value == nullptr) high = len;
+
+ if (!no_indices) {
+ lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr);
+ }
+
+ lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low);
+ lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int);
+
+ lbAddr str = lb_add_local_generated(p, t_string16, false);
+ lb_fill_string(p, str, elem, new_len);
+ return str;
+ }
GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type));
lbValue len = lb_string_len(p, base);
if (high.value == nullptr) high = len;
diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp
index 3ce0c725f..d84b8302b 100644
--- a/src/llvm_backend_general.cpp
+++ b/src/llvm_backend_general.cpp
@@ -85,6 +85,7 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) {
string_map_init(&m->members);
string_map_init(&m->procedures);
string_map_init(&m->const_strings);
+ string16_map_init(&m->const_string16s);
map_init(&m->function_type_map);
string_map_init(&m->gen_procs);
if (USE_SEPARATE_MODULES) {
@@ -1812,6 +1813,37 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) {
return type;
}
case Basic_cstring: return LLVMPointerType(LLVMInt8TypeInContext(ctx), 0);
+
+
+ case Basic_string16:
+ {
+ char const *name = "..string16";
+ LLVMTypeRef type = LLVMGetTypeByName(m->mod, name);
+ if (type != nullptr) {
+ return type;
+ }
+ type = LLVMStructCreateNamed(ctx, name);
+
+ if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
+ GB_ASSERT(build_context.metrics.ptr_size == 4);
+ GB_ASSERT(build_context.metrics.int_size == 8);
+ LLVMTypeRef fields[3] = {
+ LLVMPointerType(lb_type(m, t_u16), 0),
+ lb_type(m, t_i32),
+ lb_type(m, t_int),
+ };
+ LLVMStructSetBody(type, fields, 3, false);
+ } else {
+ LLVMTypeRef fields[2] = {
+ LLVMPointerType(lb_type(m, t_u16), 0),
+ lb_type(m, t_int),
+ };
+ LLVMStructSetBody(type, fields, 2, false);
+ }
+ return type;
+ }
+ case Basic_cstring16: return LLVMPointerType(LLVMInt16TypeInContext(ctx), 0);
+
case Basic_any:
{
char const *name = "..any";
@@ -2684,6 +2716,57 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co
}
}
+gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) {
+ String16HashKey key = {};
+ LLVMValueRef *found = nullptr;
+
+ if (!custom_link_section) {
+ key = string_hash_string(str);
+ found = string16_map_get(&m->const_string16s, key);
+ }
+ if (found != nullptr) {
+ return *found;
+ }
+
+
+
+ LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)};
+
+ LLVMValueRef data = nullptr;
+ {
+ LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx);
+
+ TEMPORARY_ALLOCATOR_GUARD();
+
+ LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1);
+
+ for (isize i = 0; i < str.len; i++) {
+ values[i] = LLVMConstInt(llvm_u16, str.text[i], false);
+ }
+ values[str.len] = LLVMConstInt(llvm_u16, 0, false);
+
+ data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1));
+ }
+
+
+ u32 id = m->global_array_index.fetch_add(1);
+ gbString name = gb_string_make(temporary_allocator(), "csbs$");
+ name = gb_string_appendc(name, m->module_name);
+ name = gb_string_append_fmt(name, "$%x", id);
+
+ LLVMTypeRef type = LLVMTypeOf(data);
+ LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name);
+ LLVMSetInitializer(global_data, data);
+ lb_make_global_private_const(global_data);
+ LLVMSetAlignment(global_data, 2);
+
+ LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2);
+ if (!custom_link_section) {
+ string16_map_set(&m->const_string16s, key, ptr);
+ }
+ return ptr;
+}
+
gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) {
LLVMValueRef ptr = nullptr;
if (str.len != 0) {
@@ -2744,6 +2827,60 @@ gb_internal lbValue lb_find_or_add_entity_string_byte_slice_with_type(lbModule *
return res;
}
+gb_internal lbValue lb_find_or_add_entity_string16_slice_with_type(lbModule *m, String16 const &str, Type *slice_type) {
+ GB_ASSERT(is_type_slice(slice_type));
+ LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)};
+ LLVMValueRef data = nullptr;
+ {
+ LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx);
+
+ TEMPORARY_ALLOCATOR_GUARD();
+
+ LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1);
+
+ for (isize i = 0; i < str.len; i++) {
+ values[i] = LLVMConstInt(llvm_u16, str.text[i], false);
+ }
+ values[str.len] = LLVMConstInt(llvm_u16, 0, false);
+
+ data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1));
+ }
+
+ u32 id = m->global_array_index.fetch_add(1);
+ gbString name = gb_string_make(temporary_allocator(), "csba$");
+ name = gb_string_appendc(name, m->module_name);
+ name = gb_string_append_fmt(name, "$%x", id);
+
+ LLVMTypeRef type = LLVMTypeOf(data);
+ LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name);
+ LLVMSetInitializer(global_data, data);
+ lb_make_global_private_const(global_data);
+ LLVMSetAlignment(global_data, 2);
+
+ i64 data_len = str.len;
+ LLVMValueRef ptr = nullptr;
+ if (data_len != 0) {
+ ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2);
+ } else {
+ ptr = LLVMConstNull(lb_type(m, t_u8_ptr));
+ }
+ if (!is_type_u16_slice(slice_type)) {
+ Type *bt = base_type(slice_type);
+ Type *elem = bt->Slice.elem;
+ i64 sz = type_size_of(elem);
+ GB_ASSERT(sz > 0);
+ ptr = LLVMConstPointerCast(ptr, lb_type(m, alloc_type_pointer(elem)));
+ data_len /= sz;
+ }
+
+ LLVMValueRef len = LLVMConstInt(lb_type(m, t_int), data_len, true);
+ LLVMValueRef values[2] = {ptr, len};
+
+ lbValue res = {};
+ res.value = llvm_const_named_struct(m, slice_type, values, 2);
+ res.type = slice_type;
+ return res;
+}
gb_internal lbValue lb_find_ident(lbProcedure *p, lbModule *m, Entity *e, Ast *expr) {
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index e63c92f6f..8f306b771 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -2289,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
if (is_type_cstring(t)) {
return lb_cstring_len(p, v);
+ } else if (is_type_cstring16(t)) {
+ return lb_cstring16_len(p, v);
+ } else if (is_type_string16(t)) {
+ return lb_string_len(p, v);
} else if (is_type_string(t)) {
return lb_string_len(p, v);
} else if (is_type_array(t)) {
@@ -2728,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
res = lb_emit_conv(p, res, tv.type);
} else if (t->Basic.kind == Basic_cstring) {
res = lb_emit_conv(p, x, tv.type);
+ } else if (t->Basic.kind == Basic_string16) {
+ res = lb_string_elem(p, x);
+ res = lb_emit_conv(p, res, tv.type);
+ } else if (t->Basic.kind == Basic_cstring16) {
+ res = lb_emit_conv(p, x, tv.type);
}
break;
case Type_Pointer:
diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp
index 027837f3f..5481ca447 100644
--- a/src/llvm_backend_stmt.cpp
+++ b/src/llvm_backend_stmt.cpp
@@ -622,6 +622,121 @@ gb_internal void lb_build_range_string(lbProcedure *p, lbValue expr, Type *val_t
if (done_) *done_ = done;
}
+gb_internal void lb_build_range_string16(lbProcedure *p, lbValue expr, Type *val_type,
+ lbValue *val_, lbValue *idx_, lbBlock **loop_, lbBlock **done_,
+ bool is_reverse) {
+
+ lbModule *m = p->module;
+ lbValue count = lb_const_int(m, t_int, 0);
+ Type *expr_type = base_type(expr.type);
+ switch (expr_type->kind) {
+ case Type_Basic:
+ count = lb_string_len(p, expr);
+ break;
+ default:
+ GB_PANIC("Cannot do range_string of %s", type_to_string(expr_type));
+ break;
+ }
+
+ lbValue val = {};
+ lbValue idx = {};
+ lbBlock *loop = nullptr;
+ lbBlock *done = nullptr;
+ lbBlock *body = nullptr;
+
+ loop = lb_create_block(p, "for.string16.loop");
+ body = lb_create_block(p, "for.string16.body");
+ done = lb_create_block(p, "for.string16.done");
+
+ lbAddr offset_ = lb_add_local_generated(p, t_int, false);
+ lbValue offset = {};
+ lbValue cond = {};
+
+ if (!is_reverse) {
+ /*
+ for c, offset in str {
+ ...
+ }
+
+ offset := 0
+ for offset < len(str) {
+ c, _w := string16_decode_rune(str[offset:])
+ ...
+ offset += _w
+ }
+ */
+ lb_addr_store(p, offset_, lb_const_int(m, t_int, 0));
+
+ lb_emit_jump(p, loop);
+ lb_start_block(p, loop);
+
+
+ offset = lb_addr_load(p, offset_);
+ cond = lb_emit_comp(p, Token_Lt, offset, count);
+ } else {
+ // NOTE(bill): REVERSED LOGIC
+ /*
+ #reverse for c, offset in str {
+ ...
+ }
+
+ offset := len(str)
+ for offset > 0 {
+ c, _w := string16_decode_last_rune(str[:offset])
+ offset -= _w
+ ...
+ }
+ */
+ lb_addr_store(p, offset_, count);
+
+ lb_emit_jump(p, loop);
+ lb_start_block(p, loop);
+
+ offset = lb_addr_load(p, offset_);
+ cond = lb_emit_comp(p, Token_Gt, offset, lb_const_int(m, t_int, 0));
+ }
+ lb_emit_if(p, cond, body, done);
+ lb_start_block(p, body);
+
+
+ lbValue rune_and_len = {};
+ if (!is_reverse) {
+ lbValue str_elem = lb_emit_ptr_offset(p, lb_string_elem(p, expr), offset);
+ lbValue str_len = lb_emit_arith(p, Token_Sub, count, offset, t_int);
+ auto args = array_make<lbValue>(permanent_allocator(), 1);
+ args[0] = lb_emit_string16(p, str_elem, str_len);
+
+ rune_and_len = lb_emit_runtime_call(p, "string16_decode_rune", args);
+ lbValue len = lb_emit_struct_ev(p, rune_and_len, 1);
+ lb_addr_store(p, offset_, lb_emit_arith(p, Token_Add, offset, len, t_int));
+
+ idx = offset;
+ } else {
+ // NOTE(bill): REVERSED LOGIC
+ lbValue str_elem = lb_string_elem(p, expr);
+ lbValue str_len = offset;
+ auto args = array_make<lbValue>(permanent_allocator(), 1);
+ args[0] = lb_emit_string16(p, str_elem, str_len);
+
+ rune_and_len = lb_emit_runtime_call(p, "string16_decode_last_rune", args);
+ lbValue len = lb_emit_struct_ev(p, rune_and_len, 1);
+ lb_addr_store(p, offset_, lb_emit_arith(p, Token_Sub, offset, len, t_int));
+
+ idx = lb_addr_load(p, offset_);
+ }
+
+
+ if (val_type != nullptr) {
+ val = lb_emit_struct_ev(p, rune_and_len, 0);
+ }
+
+ if (val_) *val_ = val;
+ if (idx_) *idx_ = idx;
+ if (loop_) *loop_ = loop;
+ if (done_) *done_ = done;
+}
+
+
gb_internal Ast *lb_strip_and_prefix(Ast *ident) {
if (ident != nullptr) {
@@ -1138,7 +1253,11 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc
}
Type *t = base_type(string.type);
GB_ASSERT(!is_type_cstring(t));
- lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse);
+ if (is_type_string16(t)) {
+ lb_build_range_string16(p, string, val0_type, &val, &key, &loop, &done, rs->reverse);
+ } else {
+ lb_build_range_string(p, string, val0_type, &val, &key, &loop, &done, rs->reverse);
+ }
break;
}
case Type_Tuple:
diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp
index 43c5f0b40..d1e7c0559 100644
--- a/src/llvm_backend_type.cpp
+++ b/src/llvm_backend_type.cpp
@@ -525,14 +525,48 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
break;
case Basic_string:
- tag_type = t_type_info_string;
+ {
+ tag_type = t_type_info_string;
+ LLVMValueRef vals[2] = {
+ lb_const_bool(m, t_bool, false).value,
+ lb_const_int(m, t_type_info_string_encoding_kind, 0).value,
+ };
+
+ variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
+ }
break;
case Basic_cstring:
{
tag_type = t_type_info_string;
- LLVMValueRef vals[1] = {
+ LLVMValueRef vals[2] = {
+ lb_const_bool(m, t_bool, true).value,
+ lb_const_int(m, t_type_info_string_encoding_kind, 0).value,
+ };
+
+ variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
+ }
+ break;
+
+ case Basic_string16:
+ {
+ tag_type = t_type_info_string;
+ LLVMValueRef vals[2] = {
+ lb_const_bool(m, t_bool, false).value,
+ lb_const_int(m, t_type_info_string_encoding_kind, 1).value,
+ };
+
+ variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
+ }
+ break;
+
+
+ case Basic_cstring16:
+ {
+ tag_type = t_type_info_string;
+ LLVMValueRef vals[2] = {
lb_const_bool(m, t_bool, true).value,
+ lb_const_int(m, t_type_info_string_encoding_kind, 1).value,
};
variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp
index 521553147..dcb95a9a2 100644
--- a/src/llvm_backend_utility.cpp
+++ b/src/llvm_backend_utility.cpp
@@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) {
case Type_Basic:
switch (t->Basic.kind) {
case Basic_string:
+ case Basic_string16:
case Basic_any:
return true;
@@ -190,6 +191,23 @@ gb_internal lbValue lb_emit_clamp(lbProcedure *p, Type *t, lbValue x, lbValue mi
return z;
}
+gb_internal lbValue lb_emit_string16(lbProcedure *p, lbValue str_elem, lbValue str_len) {
+ if (false && lb_is_const(str_elem) && lb_is_const(str_len)) {
+ LLVMValueRef values[2] = {
+ str_elem.value,
+ str_len.value,
+ };
+ lbValue res = {};
+ res.type = t_string16;
+ res.value = llvm_const_named_struct(p->module, t_string16, values, gb_count_of(values));
+ return res;
+ } else {
+ lbAddr res = lb_add_local_generated(p, t_string16, false);
+ lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 0), str_elem);
+ lb_emit_store(p, lb_emit_struct_ep(p, res.addr, 1), str_len);
+ return lb_addr_load(p, res);
+ }
+}
gb_internal lbValue lb_emit_string(lbProcedure *p, lbValue str_elem, lbValue str_len) {
@@ -981,7 +999,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) {
} else if (build_context.ptr_size != build_context.int_size) {
switch (t->kind) {
case Type_Basic:
- if (t->Basic.kind != Basic_string) {
+ if (t->Basic.kind != Basic_string &&
+ t->Basic.kind != Basic_string16) {
break;
}
/*fallthrough*/
@@ -1160,6 +1179,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
case 0: result_type = alloc_type_pointer(t->Slice.elem); break;
case 1: result_type = t_int; break;
}
+ } else if (is_type_string16(t)) {
+ switch (index) {
+ case 0: result_type = t_u16_ptr; break;
+ case 1: result_type = t_int; break;
+ }
} else if (is_type_string(t)) {
switch (index) {
case 0: result_type = t_u8_ptr; break;
@@ -1273,6 +1297,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
switch (t->kind) {
case Type_Basic:
switch (t->Basic.kind) {
+ case Basic_string16:
+ switch (index) {
+ case 0: result_type = t_u16_ptr; break;
+ case 1: result_type = t_int; break;
+ }
+ break;
case Basic_string:
switch (index) {
case 0: result_type = t_u8_ptr; break;
@@ -1440,6 +1470,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection
e = lb_emit_struct_ep(p, e, index);
break;
+ case Basic_string16:
+ e = lb_emit_struct_ep(p, e, index);
+ break;
+
default:
GB_PANIC("un-gep-able type %s", type_to_string(type));
break;
@@ -1626,11 +1660,17 @@ gb_internal void lb_fill_string(lbProcedure *p, lbAddr const &string, lbValue ba
gb_internal lbValue lb_string_elem(lbProcedure *p, lbValue string) {
Type *t = base_type(string.type);
+ if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) {
+ return lb_emit_struct_ev(p, string, 0);
+ }
GB_ASSERT(t->kind == Type_Basic && t->Basic.kind == Basic_string);
return lb_emit_struct_ev(p, string, 0);
}
gb_internal lbValue lb_string_len(lbProcedure *p, lbValue string) {
Type *t = base_type(string.type);
+ if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) {
+ return lb_emit_struct_ev(p, string, 1);
+ }
GB_ASSERT_MSG(t->kind == Type_Basic && t->Basic.kind == Basic_string, "%s", type_to_string(t));
return lb_emit_struct_ev(p, string, 1);
}
@@ -1641,6 +1681,12 @@ gb_internal lbValue lb_cstring_len(lbProcedure *p, lbValue value) {
args[0] = lb_emit_conv(p, value, t_cstring);
return lb_emit_runtime_call(p, "cstring_len", args);
}
+gb_internal lbValue lb_cstring16_len(lbProcedure *p, lbValue value) {
+ GB_ASSERT(is_type_cstring16(value.type));
+ auto args = array_make<lbValue>(permanent_allocator(), 1);
+ args[0] = lb_emit_conv(p, value, t_cstring16);
+ return lb_emit_runtime_call(p, "cstring16_len", args);
+}
gb_internal lbValue lb_array_elem(lbProcedure *p, lbValue array_ptr) {
diff --git a/src/main.cpp b/src/main.cpp
index 112d1208a..5a43e3c02 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -142,9 +142,9 @@ gb_internal i32 system_exec_command_line_app_internal(bool exit_on_err, char con
}
wcmd = string_to_string16(permanent_allocator(), make_string(cast(u8 *)cmd_line, cmd_len-1));
- if (CreateProcessW(nullptr, wcmd.text,
- nullptr, nullptr, true, 0, nullptr, nullptr,
- &start_info, &pi)) {
+ if (CreateProcessW(nullptr, cast(wchar_t *)wcmd.text,
+ nullptr, nullptr, true, 0, nullptr, nullptr,
+ &start_info, &pi)) {
WaitForSingleObject(pi.hProcess, INFINITE);
GetExitCodeProcess(pi.hProcess, cast(DWORD *)&exit_code);
@@ -232,7 +232,7 @@ gb_internal Array<String> setup_args(int argc, char const **argv) {
wchar_t **wargv = command_line_to_wargv(GetCommandLineW(), &wargc);
auto args = array_make<String>(a, 0, wargc);
for (isize i = 0; i < wargc; i++) {
- wchar_t *warg = wargv[i];
+ u16 *warg = cast(u16 *)wargv[i];
isize wlen = string16_len(warg);
String16 wstr = make_string16(warg, wlen);
String arg = string16_to_string(a, wstr);
diff --git a/src/microsoft_craziness.h b/src/microsoft_craziness.h
index b0fd22a23..933607a2a 100644
--- a/src/microsoft_craziness.h
+++ b/src/microsoft_craziness.h
@@ -59,7 +59,7 @@ struct Find_Result {
};
gb_internal String mc_wstring_to_string(wchar_t const *str) {
- return string16_to_string(mc_allocator, make_string16_c(str));
+ return string16_to_string(mc_allocator, make_string16_c(cast(u16 *)str));
}
gb_internal String16 mc_string_to_wstring(String str) {
@@ -103,7 +103,7 @@ gb_internal HANDLE mc_find_first(String wildcard, MC_Find_Data *find_data) {
String16 wildcard_wide = mc_string_to_wstring(wildcard);
defer (mc_free(wildcard_wide));
- HANDLE handle = FindFirstFileW(wildcard_wide.text, &_find_data);
+ HANDLE handle = FindFirstFileW(cast(wchar_t *)wildcard_wide.text, &_find_data);
if (handle == INVALID_HANDLE_VALUE) return INVALID_HANDLE_VALUE;
find_data->file_attributes = _find_data.dwFileAttributes;
diff --git a/src/path.cpp b/src/path.cpp
index d5e982088..2b97a04df 100644
--- a/src/path.cpp
+++ b/src/path.cpp
@@ -130,7 +130,7 @@ gb_internal String directory_from_path(String const &s) {
String16 wstr = string_to_string16(a, path);
defer (gb_free(a, wstr.text));
- i32 attribs = GetFileAttributesW(wstr.text);
+ i32 attribs = GetFileAttributesW(cast(wchar_t *)wstr.text);
if (attribs < 0) return false;
return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0;
@@ -360,7 +360,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi)
defer (gb_free(a, wstr.text));
WIN32_FIND_DATAW file_data = {};
- HANDLE find_file = FindFirstFileW(wstr.text, &file_data);
+ HANDLE find_file = FindFirstFileW(cast(wchar_t *)wstr.text, &file_data);
if (find_file == INVALID_HANDLE_VALUE) {
return ReadDirectory_Unknown;
}
@@ -372,7 +372,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi)
wchar_t *filename_w = file_data.cFileName;
u64 size = cast(u64)file_data.nFileSizeLow;
size |= (cast(u64)file_data.nFileSizeHigh) << 32;
- String name = string16_to_string(a, make_string16_c(filename_w));
+ String name = string16_to_string(a, make_string16_c(cast(u16 *)filename_w));
if (name == "." || name == "..") {
gb_free(a, name.text);
continue;
@@ -494,7 +494,7 @@ gb_internal bool write_directory(String path) {
#else
gb_internal bool write_directory(String path) {
String16 wstr = string_to_string16(heap_allocator(), path);
- LPCWSTR wdirectory_name = wstr.text;
+ LPCWSTR wdirectory_name = cast(wchar_t *)wstr.text;
HANDLE directory = CreateFileW(wdirectory_name,
GENERIC_WRITE,
diff --git a/src/string.cpp b/src/string.cpp
index ae8d066b1..2087a5fee 100644
--- a/src/string.cpp
+++ b/src/string.cpp
@@ -26,15 +26,14 @@ struct String_Iterator {
// NOTE(bill): String16 is only used for Windows due to its file directories
struct String16 {
- wchar_t *text;
- isize len;
- wchar_t const &operator[](isize i) const {
+ u16 * text;
+ isize len;
+ u16 const &operator[](isize i) const {
GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
return text[i];
}
};
-
gb_internal gb_inline String make_string(u8 const *text, isize len) {
String s;
s.text = cast(u8 *)text;
@@ -45,19 +44,19 @@ gb_internal gb_inline String make_string(u8 const *text, isize len) {
return s;
}
-
-gb_internal gb_inline String16 make_string16(wchar_t const *text, isize len) {
+gb_internal gb_inline String16 make_string16(u16 const *text, isize len) {
String16 s;
- s.text = cast(wchar_t *)text;
+ s.text = cast(u16 *)text;
s.len = len;
return s;
}
-gb_internal isize string16_len(wchar_t const *s) {
+
+gb_internal isize string16_len(u16 const *s) {
if (s == nullptr) {
return 0;
}
- wchar_t const *p = s;
+ u16 const *p = s;
while (*p) {
p++;
}
@@ -69,7 +68,7 @@ gb_internal gb_inline String make_string_c(char const *text) {
return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
}
-gb_internal gb_inline String16 make_string16_c(wchar_t const *text) {
+gb_internal gb_inline String16 make_string16_c(u16 const *text) {
return make_string16(text, string16_len(text));
}
@@ -80,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) {
return make_string(s.text+lo, hi-lo);
}
+gb_internal String16 substring(String16 const &s, isize lo, isize hi) {
+ isize max = s.len;
+ GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max);
+
+ return make_string16(s.text+lo, hi-lo);
+}
+
gb_internal char *alloc_cstring(gbAllocator a, String s) {
char *c_str = gb_alloc_array(a, char, s.len+1);
@@ -145,6 +151,27 @@ gb_internal int string_compare(String const &a, String const &b) {
return res;
}
+
+gb_internal int string16_compare(String16 const &a, String16 const &b) {
+ if (a.text == b.text) {
+ return cast(int)(a.len - b.len);
+ }
+ if (a.text == nullptr) {
+ return -1;
+ }
+ if (b.text == nullptr) {
+ return +1;
+ }
+
+ uintptr n = gb_min(a.len, b.len);
+ int res = memcmp(a.text, b.text, n*gb_size_of(u16));
+ if (res == 0) {
+ res = cast(int)(a.len - b.len);
+ }
+ return res;
+}
+
+
gb_internal isize string_index_byte(String const &s, u8 x) {
for (isize i = 0; i < s.len; i++) {
if (s.text[i] == x) {
@@ -182,6 +209,26 @@ template <isize N> gb_internal bool operator >= (String const &a, char const (&b
template <> bool operator == (String const &a, char const (&b)[1]) { return a.len == 0; }
template <> bool operator != (String const &a, char const (&b)[1]) { return a.len != 0; }
+
+gb_internal gb_inline bool str_eq(String16 const &a, String16 const &b) {
+ if (a.len != b.len) return false;
+ if (a.len == 0) return true;
+ return memcmp(a.text, b.text, a.len) == 0;
+}
+gb_internal gb_inline bool str_ne(String16 const &a, String16 const &b) { return !str_eq(a, b); }
+gb_internal gb_inline bool str_lt(String16 const &a, String16 const &b) { return string16_compare(a, b) < 0; }
+gb_internal gb_inline bool str_gt(String16 const &a, String16 const &b) { return string16_compare(a, b) > 0; }
+gb_internal gb_inline bool str_le(String16 const &a, String16 const &b) { return string16_compare(a, b) <= 0; }
+gb_internal gb_inline bool str_ge(String16 const &a, String16 const &b) { return string16_compare(a, b) >= 0; }
+
+gb_internal gb_inline bool operator == (String16 const &a, String16 const &b) { return str_eq(a, b); }
+gb_internal gb_inline bool operator != (String16 const &a, String16 const &b) { return str_ne(a, b); }
+gb_internal gb_inline bool operator < (String16 const &a, String16 const &b) { return str_lt(a, b); }
+gb_internal gb_inline bool operator > (String16 const &a, String16 const &b) { return str_gt(a, b); }
+gb_internal gb_inline bool operator <= (String16 const &a, String16 const &b) { return str_le(a, b); }
+gb_internal gb_inline bool operator >= (String16 const &a, String16 const &b) { return str_ge(a, b); }
+
+
gb_internal gb_inline bool string_starts_with(String const &s, String const &prefix) {
if (prefix.len > s.len) {
return false;
@@ -611,10 +658,9 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons
-// TODO(bill): Make this non-windows specific
gb_internal String16 string_to_string16(gbAllocator a, String s) {
int len, len1;
- wchar_t *text;
+ u16 *text;
if (s.len < 1) {
return make_string16(nullptr, 0);
@@ -625,15 +671,14 @@ gb_internal String16 string_to_string16(gbAllocator a, String s) {
return make_string16(nullptr, 0);
}
- text = gb_alloc_array(a, wchar_t, len+1);
+ text = gb_alloc_array(a, u16, len+1);
- len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len);
+ len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, cast(wchar_t *)text, cast(int)len);
if (len1 == 0) {
gb_free(a, text);
return make_string16(nullptr, 0);
}
text[len] = 0;
-
return make_string16(text, len);
}
@@ -646,7 +691,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) {
return make_string(nullptr, 0);
}
- len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0);
+ len = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, nullptr, 0);
if (len == 0) {
return make_string(nullptr, 0);
}
@@ -654,7 +699,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) {
text = gb_alloc_array(a, u8, len+1);
- len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len);
+ len1 = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, cast(char *)text, cast(int)len);
if (len1 == 0) {
gb_free(a, text);
return make_string(nullptr, 0);
@@ -674,9 +719,9 @@ gb_internal String temporary_directory(gbAllocator allocator) {
return String{0};
}
DWORD len = gb_max(MAX_PATH, n);
- wchar_t *b = gb_alloc_array(heap_allocator(), wchar_t, len+1);
+ u16 *b = gb_alloc_array(heap_allocator(), u16, len+1);
defer (gb_free(heap_allocator(), b));
- n = GetTempPathW(len, b);
+ n = GetTempPathW(len, cast(wchar_t *)b);
if (n == 3 && b[1] == ':' && b[2] == '\\') {
} else if (n > 0 && b[n-1] == '\\') {
@@ -791,6 +836,104 @@ gb_internal String quote_to_ascii(gbAllocator a, String str, u8 quote='"') {
return res;
}
+gb_internal Rune decode_surrogate_pair(u16 r1, u16 r2) {
+ static Rune const _surr1 = 0xd800;
+ static Rune const _surr2 = 0xdc00;
+ static Rune const _surr3 = 0xe000;
+ static Rune const _surr_self = 0x10000;
+
+ if (_surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3) {
+ return (((r1-_surr1)<<10) | (r2 - _surr2)) + _surr_self;
+ }
+ return GB_RUNE_INVALID;
+}
+
+gb_internal String quote_to_ascii(gbAllocator a, String16 str, u8 quote='"') {
+ static Rune const _surr1 = 0xd800;
+ static Rune const _surr2 = 0xdc00;
+ static Rune const _surr3 = 0xe000;
+ static Rune const _surr_self = 0x10000;
+
+ u16 *s = cast(u16 *)str.text;
+ isize n = str.len;
+ auto buf = array_make<u8>(a, 0, n*2);
+ array_add(&buf, quote);
+ for (isize width = 0; n > 0; s += width, n -= width) {
+ Rune r = cast(Rune)s[0];
+ width = 1;
+ if (r < _surr1 || _surr3 <= r) {
+ r = cast(Rune)r;
+ } else if (_surr1 <= r && r < _surr2) {
+ if (n>1) {
+ r = decode_surrogate_pair(s[0], s[1]);
+ if (r != GB_RUNE_INVALID) {
+ width = 2;
+ }
+ } else {
+ r = GB_RUNE_INVALID;
+ }
+ }
+ if (width == 1 && r == GB_RUNE_INVALID) {
+ array_add(&buf, cast(u8)'\\');
+ array_add(&buf, cast(u8)'x');
+ array_add(&buf, cast(u8)lower_hex[s[0]>>4]);
+ array_add(&buf, cast(u8)lower_hex[s[0]&0xf]);
+ continue;
+ }
+
+ if (r == quote || r == '\\') {
+ array_add(&buf, cast(u8)'\\');
+ array_add(&buf, u8(r));
+ continue;
+ }
+ if (r < 0x80 && is_printable(r)) {
+ array_add(&buf, u8(r));
+ continue;
+ }
+ switch (r) {
+ case '\a':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\v':
+ default:
+ if (r < ' ') {
+ u8 b = cast(u8)r;
+ array_add(&buf, cast(u8)'\\');
+ array_add(&buf, cast(u8)'x');
+ array_add(&buf, cast(u8)lower_hex[b>>4]);
+ array_add(&buf, cast(u8)lower_hex[b&0xf]);
+ }
+ if (r > GB_RUNE_MAX) {
+ r = 0XFFFD;
+ }
+ if (r < 0x10000) {
+ array_add(&buf, cast(u8)'\\');
+ array_add(&buf, cast(u8)'u');
+ for (isize i = 12; i >= 0; i -= 4) {
+ array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
+ }
+ } else {
+ array_add(&buf, cast(u8)'\\');
+ array_add(&buf, cast(u8)'U');
+ for (isize i = 28; i >= 0; i -= 4) {
+ array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
+ }
+ }
+ }
+ }
+
+
+
+ array_add(&buf, quote);
+ String res = {};
+ res.text = buf.data;
+ res.len = buf.count;
+ return res;
+}
+
diff --git a/src/string16_map.cpp b/src/string16_map.cpp
new file mode 100644
index 000000000..c9e2eb817
--- /dev/null
+++ b/src/string16_map.cpp
@@ -0,0 +1,538 @@
+GB_STATIC_ASSERT(sizeof(MapIndex) == sizeof(u32));
+
+
+struct String16HashKey {
+ String16 string;
+ u32 hash;
+
+ operator String16() const noexcept {
+ return this->string;
+ }
+ operator String16 const &() const noexcept {
+ return this->string;
+ }
+};
+gb_internal gb_inline u32 string_hash(String16 const &s) {
+ u32 res = fnv32a(s.text, s.len*gb_size_of(u16)) & 0x7fffffff;
+ return res | (res == 0);
+}
+
+gb_internal gb_inline String16HashKey string_hash_string(String16 const &s) {
+ String16HashKey hash_key = {};
+ hash_key.hash = string_hash(s);
+ hash_key.string = s;
+ return hash_key;
+}
+
+
+#if 1 /* old string map */
+
+template <typename T>
+struct String16MapEntry {
+ String16 key;
+ u32 hash;
+ MapIndex next;
+ T value;
+};
+
+template <typename T>
+struct String16Map {
+ MapIndex * hashes;
+ usize hashes_count;
+ String16MapEntry<T> *entries;
+ u32 count;
+ u32 entries_capacity;
+};
+
+
+template <typename T> gb_internal void string16_map_init (String16Map<T> *h, usize capacity = 16);
+template <typename T> gb_internal void string16_map_destroy (String16Map<T> *h);
+
+template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16HashKey const &key);
+template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16HashKey const &key);
+template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16HashKey const &key, T const &value);
+
+// template <typename T> gb_internal void string16_map_remove (String16Map<T> *h, String16HashKey const &key);
+template <typename T> gb_internal void string16_map_clear (String16Map<T> *h);
+template <typename T> gb_internal void string16_map_grow (String16Map<T> *h);
+template <typename T> gb_internal void string16_map_reserve (String16Map<T> *h, usize new_count);
+
+gb_internal gbAllocator string16_map_allocator(void) {
+ return heap_allocator();
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_init(String16Map<T> *h, usize capacity) {
+ capacity = next_pow2_isize(capacity);
+ string16_map_reserve(h, capacity);
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_destroy(String16Map<T> *h) {
+ gb_free(string16_map_allocator(), h->hashes);
+ gb_free(string16_map_allocator(), h->entries);
+}
+
+
+template <typename T>
+gb_internal void string16_map__resize_hashes(String16Map<T> *h, usize count) {
+ h->hashes_count = cast(u32)resize_array_raw(&h->hashes, string16_map_allocator(), h->hashes_count, count, MAP_CACHE_LINE_SIZE);
+}
+
+
+template <typename T>
+gb_internal void string16_map__reserve_entries(String16Map<T> *h, usize capacity) {
+ h->entries_capacity = cast(u32)resize_array_raw(&h->entries, string16_map_allocator(), h->entries_capacity, capacity, MAP_CACHE_LINE_SIZE);
+}
+
+
+template <typename T>
+gb_internal MapIndex string16_map__add_entry(String16Map<T> *h, u32 hash, String16 const &key) {
+ String16MapEntry<T> e = {};
+ e.key = key;
+ e.hash = hash;
+ e.next = MAP_SENTINEL;
+ if (h->count+1 >= h->entries_capacity) {
+ string16_map__reserve_entries(h, gb_max(h->entries_capacity*2, 4));
+ }
+ h->entries[h->count++] = e;
+ return cast(MapIndex)(h->count-1);
+}
+
+template <typename T>
+gb_internal MapFindResult string16_map__find(String16Map<T> *h, u32 hash, String16 const &key) {
+ MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
+ if (h->hashes_count != 0) {
+ fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1));
+ fr.entry_index = h->hashes[fr.hash_index];
+ while (fr.entry_index != MAP_SENTINEL) {
+ auto *entry = &h->entries[fr.entry_index];
+ if (entry->hash == hash && entry->key == key) {
+ return fr;
+ }
+ fr.entry_prev = fr.entry_index;
+ fr.entry_index = entry->next;
+ }
+ }
+ return fr;
+}
+
+template <typename T>
+gb_internal MapFindResult string16_map__find_from_entry(String16Map<T> *h, String16MapEntry<T> *e) {
+ MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
+ if (h->hashes_count != 0) {
+ fr.hash_index = cast(MapIndex)(e->hash & (h->hashes_count-1));
+ fr.entry_index = h->hashes[fr.hash_index];
+ while (fr.entry_index != MAP_SENTINEL) {
+ auto *entry = &h->entries[fr.entry_index];
+ if (entry == e) {
+ return fr;
+ }
+ fr.entry_prev = fr.entry_index;
+ fr.entry_index = entry->next;
+ }
+ }
+ return fr;
+}
+
+template <typename T>
+gb_internal b32 string16_map__full(String16Map<T> *h) {
+ return 0.75f * h->hashes_count <= h->count;
+}
+
+template <typename T>
+gb_inline void string16_map_grow(String16Map<T> *h) {
+ isize new_count = gb_max(h->hashes_count<<1, 16);
+ string16_map_reserve(h, new_count);
+}
+
+
+template <typename T>
+gb_internal void string16_map_reset_entries(String16Map<T> *h) {
+ for (u32 i = 0; i < h->hashes_count; i++) {
+ h->hashes[i] = MAP_SENTINEL;
+ }
+ for (isize i = 0; i < h->count; i++) {
+ MapFindResult fr;
+ String16MapEntry<T> *e = &h->entries[i];
+ e->next = MAP_SENTINEL;
+ fr = string16_map__find_from_entry(h, e);
+ if (fr.entry_prev == MAP_SENTINEL) {
+ h->hashes[fr.hash_index] = cast(MapIndex)i;
+ } else {
+ h->entries[fr.entry_prev].next = cast(MapIndex)i;
+ }
+ }
+}
+
+template <typename T>
+gb_internal void string16_map_reserve(String16Map<T> *h, usize cap) {
+ if (h->count*2 < h->hashes_count) {
+ return;
+ }
+ string16_map__reserve_entries(h, cap);
+ string16_map__resize_hashes(h, cap*2);
+ string16_map_reset_entries(h);
+}
+
+template <typename T>
+gb_internal T *string16_map_get(String16Map<T> *h, u32 hash, String16 const &key) {
+ MapFindResult fr = {MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL};
+ if (h->hashes_count != 0) {
+ fr.hash_index = cast(MapIndex)(hash & (h->hashes_count-1));
+ fr.entry_index = h->hashes[fr.hash_index];
+ while (fr.entry_index != MAP_SENTINEL) {
+ auto *entry = &h->entries[fr.entry_index];
+ if (entry->hash == hash && entry->key == key) {
+ return &entry->value;
+ }
+ fr.entry_prev = fr.entry_index;
+ fr.entry_index = entry->next;
+ }
+ }
+ return nullptr;
+}
+
+
+template <typename T>
+gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16HashKey const &key) {
+ return string16_map_get(h, key.hash, key.string);
+}
+template <typename T>
+gb_internal T &string16_map_must_get(String16Map<T> *h, u32 hash, String16 const &key) {
+ isize index = string16_map__find(h, hash, key).entry_index;
+ GB_ASSERT(index != MAP_SENTINEL);
+ return h->entries[index].value;
+}
+
+template <typename T>
+gb_internal T &string16_map_must_get(String16Map<T> *h, String16HashKey const &key) {
+ return string16_map_must_get(h, key.hash, key.string);
+}
+
+template <typename T>
+gb_internal void string16_map_set(String16Map<T> *h, u32 hash, String16 const &key, T const &value) {
+ MapIndex index;
+ MapFindResult fr;
+ if (h->hashes_count == 0) {
+ string16_map_grow(h);
+ }
+ fr = string16_map__find(h, hash, key);
+ if (fr.entry_index != MAP_SENTINEL) {
+ index = fr.entry_index;
+ } else {
+ index = string16_map__add_entry(h, hash, key);
+ if (fr.entry_prev != MAP_SENTINEL) {
+ h->entries[fr.entry_prev].next = index;
+ } else {
+ h->hashes[fr.hash_index] = index;
+ }
+ }
+ h->entries[index].value = value;
+
+ if (string16_map__full(h)) {
+ string16_map_grow(h);
+ }
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16HashKey const &key, T const &value) {
+ string16_map_set(h, key.hash, key.string, value);
+}
+
+
+template <typename T>
+gb_internal gb_inline void string16_map_clear(String16Map<T> *h) {
+ h->count = 0;
+ for (u32 i = 0; i < h->hashes_count; i++) {
+ h->hashes[i] = MAP_SENTINEL;
+ }
+}
+
+
+
+template <typename T>
+gb_internal String16MapEntry<T> *begin(String16Map<T> &m) noexcept {
+ return m.entries;
+}
+template <typename T>
+gb_internal String16MapEntry<T> const *begin(String16Map<T> const &m) noexcept {
+ return m.entries;
+}
+
+
+template <typename T>
+gb_internal String16MapEntry<T> *end(String16Map<T> &m) noexcept {
+ return m.entries + m.count;
+}
+
+template <typename T>
+gb_internal String16MapEntry<T> const *end(String16Map<T> const &m) noexcept {
+ return m.entries + m.count;
+}
+
+#else /* new string map */
+
+template <typename T>
+struct StringMapEntry {
+ String key;
+ u32 hash;
+ T value;
+};
+
+template <typename T>
+struct StringMap {
+ String16MapEntry<T> *entries;
+ u32 count;
+ u32 capacity;
+};
+
+
+template <typename T> gb_internal void string16_map_init (String16Map<T> *h, usize capacity = 16);
+template <typename T> gb_internal void string16_map_destroy (String16Map<T> *h);
+
+template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16 const &key);
+template <typename T> gb_internal T * string16_map_get (String16Map<T> *h, String16HashKey const &key);
+
+template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16 const &key);
+template <typename T> gb_internal T & string16_map_must_get(String16Map<T> *h, String16HashKey const &key);
+
+template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16 const &key, T const &value);
+template <typename T> gb_internal void string16_map_set (String16Map<T> *h, String16HashKey const &key, T const &value);
+
+// template <typename T> gb_internal void string16_map_remove (String16Map<T> *h, String16HashKey const &key);
+template <typename T> gb_internal void string16_map_clear (String16Map<T> *h);
+template <typename T> gb_internal void string16_map_grow (String16Map<T> *h);
+template <typename T> gb_internal void string16_map_reserve (String16Map<T> *h, usize new_count);
+
+gb_internal gbAllocator string16_map_allocator(void) {
+ return heap_allocator();
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_init(String16Map<T> *h, usize capacity) {
+ capacity = next_pow2_isize(capacity);
+ string16_map_reserve(h, capacity);
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_destroy(String16Map<T> *h) {
+ gb_free(string16_map_allocator(), h->entries);
+}
+
+
+template <typename T>
+gb_internal void string16_map__insert(String16Map<T> *h, u32 hash, String16 const &key, T const &value) {
+ if (h->count+1 >= h->capacity) {
+ string16_map_grow(h);
+ }
+ GB_ASSERT(h->count+1 < h->capacity);
+
+ u32 mask = h->capacity-1;
+ MapIndex index = hash & mask;
+ MapIndex original_index = index;
+ do {
+ auto *entry = h->entries+index;
+ if (entry->hash == 0) {
+ entry->key = key;
+ entry->hash = hash;
+ entry->value = value;
+
+ h->count += 1;
+ return;
+ }
+ index = (index+1)&mask;
+ } while (index != original_index);
+
+ GB_PANIC("Full map");
+}
+
+template <typename T>
+gb_internal b32 string16_map__full(String16Map<T> *h) {
+ return 0.75f * h->count <= h->capacity;
+}
+
+template <typename T>
+gb_inline void string16_map_grow(String16Map<T> *h) {
+ isize new_capacity = gb_max(h->capacity<<1, 16);
+ string16_map_reserve(h, new_capacity);
+}
+
+
+template <typename T>
+gb_internal void string16_map_reserve(String16Map<T> *h, usize cap) {
+ if (cap < h->capacity) {
+ return;
+ }
+ cap = next_pow2_isize(cap);
+
+ String16Map<T> new_h = {};
+ new_h.count = 0;
+ new_h.capacity = cast(u32)cap;
+ new_h.entries = gb_alloc_array(string16_map_allocator(), String16MapEntry<T>, new_h.capacity);
+
+ if (h->count) {
+ for (u32 i = 0; i < h->capacity; i++) {
+ auto *entry = h->entries+i;
+ if (entry->hash) {
+ string16_map__insert(&new_h, entry->hash, entry->key, entry->value);
+ }
+ }
+ }
+ string16_map_destroy(h);
+ *h = new_h;
+}
+
+template <typename T>
+gb_internal T *string16_map_get(String16Map<T> *h, u32 hash, String16 const &key) {
+ if (h->count == 0) {
+ return nullptr;
+ }
+ u32 mask = (h->capacity-1);
+ u32 index = hash & mask;
+ u32 original_index = index;
+ do {
+ auto *entry = h->entries+index;
+ u32 curr_hash = entry->hash;
+ if (curr_hash == 0) {
+ // NOTE(bill): no found, but there isn't any key removal for this hash map
+ return nullptr;
+ } else if (curr_hash == hash && entry->key == key) {
+ return &entry->value;
+ }
+ index = (index+1) & mask;
+ } while (original_index != index);
+ return nullptr;
+}
+
+
+template <typename T>
+gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16HashKey const &key) {
+ return string16_map_get(h, key.hash, key.string);
+}
+
+template <typename T>
+gb_internal gb_inline T *string16_map_get(String16Map<T> *h, String16 const &key) {
+ return string16_map_get(h, string_hash(key), key);
+}
+
+template <typename T>
+gb_internal T &string16_map_must_get(String16Map<T> *h, u32 hash, String16 const &key) {
+ T *found = string16_map_get(h, hash, key);
+ GB_ASSERT(found != nullptr);
+ return *found;
+}
+
+template <typename T>
+gb_internal T &string16_map_must_get(String16Map<T> *h, String16HashKey const &key) {
+ return string16_map_must_get(h, key.hash, key.string);
+}
+
+template <typename T>
+gb_internal gb_inline T &string16_map_must_get(String16Map<T> *h, String16 const &key) {
+ return string16_map_must_get(h, string_hash(key), key);
+}
+
+template <typename T>
+gb_internal void string16_map_set(String16Map<T> *h, u32 hash, String16 const &key, T const &value) {
+ if (h->count == 0) {
+ string16_map_grow(h);
+ }
+ auto *found = string16_map_get(h, hash, key);
+ if (found) {
+ *found = value;
+ return;
+ }
+ string16_map__insert(h, hash, key, value);
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16 const &key, T const &value) {
+ string16_map_set(h, string_hash_string(key), value);
+}
+
+template <typename T>
+gb_internal gb_inline void string16_map_set(String16Map<T> *h, String16HashKey const &key, T const &value) {
+ string16_map_set(h, key.hash, key.string, value);
+}
+
+
+template <typename T>
+gb_internal gb_inline void string16_map_clear(String16Map<T> *h) {
+ h->count = 0;
+ gb_zero_array(h->entries, h->capacity);
+}
+
+
+template <typename T>
+struct StringMapIterator {
+ String16Map<T> *map;
+ MapIndex index;
+
+ StringMapIterator<T> &operator++() noexcept {
+ for (;;) {
+ ++index;
+ if (map->capacity == index) {
+ return *this;
+ }
+ String16MapEntry<T> *entry = map->entries+index;
+ if (entry->hash != 0) {
+ return *this;
+ }
+ }
+ }
+
+ bool operator==(StringMapIterator<T> const &other) const noexcept {
+ return this->map == other->map && this->index == other->index;
+ }
+
+ operator String16MapEntry<T> *() const {
+ return map->entries+index;
+ }
+};
+
+
+template <typename T>
+gb_internal StringMapIterator<T> end(String16Map<T> &m) noexcept {
+ return StringMapIterator<T>{&m, m.capacity};
+}
+
+template <typename T>
+gb_internal StringMapIterator<T> const end(String16Map<T> const &m) noexcept {
+ return StringMapIterator<T>{&m, m.capacity};
+}
+
+
+
+template <typename T>
+gb_internal StringMapIterator<T> begin(String16Map<T> &m) noexcept {
+ if (m.count == 0) {
+ return end(m);
+ }
+
+ MapIndex index = 0;
+ while (index < m.capacity) {
+ if (m.entries[index].hash) {
+ break;
+ }
+ index++;
+ }
+ return StringMapIterator<T>{&m, index};
+}
+template <typename T>
+gb_internal StringMapIterator<T> const begin(String16Map<T> const &m) noexcept {
+ if (m.count == 0) {
+ return end(m);
+ }
+
+ MapIndex index = 0;
+ while (index < m.capacity) {
+ if (m.entries[index].hash) {
+ break;
+ }
+ index++;
+ }
+ return StringMapIterator<T>{&m, index};
+}
+
+#endif \ No newline at end of file
diff --git a/src/types.cpp b/src/types.cpp
index 29412fa25..c465714db 100644
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -41,8 +41,13 @@ enum BasicKind {
Basic_uint,
Basic_uintptr,
Basic_rawptr,
- Basic_string, // ^u8 + int
- Basic_cstring, // ^u8
+
+ Basic_string, // [^]u8 + int
+ Basic_cstring, // [^]u8
+
+ Basic_string16, // [^]u16 + int
+ Basic_cstring16, // [^]u16 + int
+
Basic_any, // rawptr + ^Type_Info
Basic_typeid,
@@ -501,8 +506,14 @@ gb_global Type basic_types[] = {
{Type_Basic, {Basic_uintptr, BasicFlag_Integer | BasicFlag_Unsigned, -1, STR_LIT("uintptr")}},
{Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}},
+
{Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}},
{Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}},
+
+ {Type_Basic, {Basic_string16, BasicFlag_String, -1, STR_LIT("string16")}},
+ {Type_Basic, {Basic_cstring16, BasicFlag_String, -1, STR_LIT("cstring16")}},
+
+
{Type_Basic, {Basic_any, 0, 16, STR_LIT("any")}},
{Type_Basic, {Basic_typeid, 0, 8, STR_LIT("typeid")}},
@@ -592,8 +603,12 @@ gb_global Type *t_uint = &basic_types[Basic_uint];
gb_global Type *t_uintptr = &basic_types[Basic_uintptr];
gb_global Type *t_rawptr = &basic_types[Basic_rawptr];
+
gb_global Type *t_string = &basic_types[Basic_string];
gb_global Type *t_cstring = &basic_types[Basic_cstring];
+gb_global Type *t_string16 = &basic_types[Basic_string16];
+gb_global Type *t_cstring16 = &basic_types[Basic_cstring16];
+
gb_global Type *t_any = &basic_types[Basic_any];
gb_global Type *t_typeid = &basic_types[Basic_typeid];
@@ -631,6 +646,8 @@ gb_global Type *t_untyped_uninit = &basic_types[Basic_UntypedUninit];
gb_global Type *t_u8_ptr = nullptr;
gb_global Type *t_u8_multi_ptr = nullptr;
+gb_global Type *t_u16_ptr = nullptr;
+gb_global Type *t_u16_multi_ptr = nullptr;
gb_global Type *t_int_ptr = nullptr;
gb_global Type *t_i64_ptr = nullptr;
gb_global Type *t_f64_ptr = nullptr;
@@ -644,6 +661,8 @@ gb_global Type *t_type_info_enum_value = nullptr;
gb_global Type *t_type_info_ptr = nullptr;
gb_global Type *t_type_info_enum_value_ptr = nullptr;
+gb_global Type *t_type_info_string_encoding_kind = nullptr;
+
gb_global Type *t_type_info_named = nullptr;
gb_global Type *t_type_info_integer = nullptr;
gb_global Type *t_type_info_rune = nullptr;
@@ -1293,6 +1312,14 @@ gb_internal bool is_type_string(Type *t) {
}
return false;
}
+gb_internal bool is_type_string16(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_Basic) {
+ return t->Basic.kind == Basic_string16;
+ }
+ return false;
+}
gb_internal bool is_type_cstring(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1301,6 +1328,14 @@ gb_internal bool is_type_cstring(Type *t) {
}
return false;
}
+gb_internal bool is_type_cstring16(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_Basic) {
+ return t->Basic.kind == Basic_cstring16;
+ }
+ return false;
+}
gb_internal bool is_type_typed(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1430,6 +1465,12 @@ gb_internal bool is_type_u8(Type *t) {
}
return false;
}
+gb_internal bool is_type_u16(Type *t) {
+ if (t->kind == Type_Basic) {
+ return t->Basic.kind == Basic_u16;
+ }
+ return false;
+}
gb_internal bool is_type_array(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1691,6 +1732,39 @@ gb_internal bool is_type_rune_array(Type *t) {
return false;
}
+gb_internal bool is_type_u16_slice(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_Slice) {
+ return is_type_u16(t->Slice.elem);
+ }
+ return false;
+}
+gb_internal bool is_type_u16_array(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_Array) {
+ return is_type_u16(t->Array.elem);
+ }
+ return false;
+}
+gb_internal bool is_type_u16_ptr(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_Pointer) {
+ return is_type_u16(t->Slice.elem);
+ }
+ return false;
+}
+gb_internal bool is_type_u16_multi_ptr(Type *t) {
+ t = base_type(t);
+ if (t == nullptr) { return false; }
+ if (t->kind == Type_MultiPointer) {
+ return is_type_u16(t->Slice.elem);
+ }
+ return false;
+}
+
gb_internal bool is_type_array_like(Type *t) {
return is_type_array(t) || is_type_enumerated_array(t);
@@ -2110,7 +2184,7 @@ gb_internal bool is_type_indexable(Type *t) {
Type *bt = base_type(t);
switch (bt->kind) {
case Type_Basic:
- return bt->Basic.kind == Basic_string;
+ return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16;
case Type_Array:
case Type_Slice:
case Type_DynamicArray:
@@ -2130,7 +2204,7 @@ gb_internal bool is_type_sliceable(Type *t) {
Type *bt = base_type(t);
switch (bt->kind) {
case Type_Basic:
- return bt->Basic.kind == Basic_string;
+ return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16;
case Type_Array:
case Type_Slice:
case Type_DynamicArray:
@@ -2377,6 +2451,7 @@ gb_internal bool type_has_nil(Type *t) {
case Basic_any:
return true;
case Basic_cstring:
+ case Basic_cstring16:
return true;
case Basic_typeid:
return true;
@@ -2444,8 +2519,9 @@ gb_internal bool is_type_comparable(Type *t) {
case Basic_rune:
return true;
case Basic_string:
- return true;
case Basic_cstring:
+ case Basic_string16:
+ case Basic_cstring16:
return true;
case Basic_typeid:
return true;
@@ -3831,10 +3907,12 @@ gb_internal i64 type_size_of(Type *t) {
if (t->kind == Type_Basic) {
GB_ASSERT_MSG(is_type_typed(t), "%s", type_to_string(t));
switch (t->Basic.kind) {
- case Basic_string: size = 2*build_context.int_size; break;
- case Basic_cstring: size = build_context.ptr_size; break;
- case Basic_any: size = 16; break;
- case Basic_typeid: size = 8; break;
+ case Basic_string: size = 2*build_context.int_size; break;
+ case Basic_cstring: size = build_context.ptr_size; break;
+ case Basic_string16: size = 2*build_context.int_size; break;
+ case Basic_cstring16: size = build_context.ptr_size; break;
+ case Basic_any: size = 16; break;
+ case Basic_typeid: size = 8; break;
case Basic_int: case Basic_uint:
size = build_context.int_size;
@@ -3894,10 +3972,12 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) {
case Type_Basic: {
GB_ASSERT(is_type_typed(t));
switch (t->Basic.kind) {
- case Basic_string: return build_context.int_size;
- case Basic_cstring: return build_context.ptr_size;
- case Basic_any: return 8;
- case Basic_typeid: return 8;
+ case Basic_string: return build_context.int_size;
+ case Basic_cstring: return build_context.ptr_size;
+ case Basic_string16: return build_context.int_size;
+ case Basic_cstring16: return build_context.ptr_size;
+ case Basic_any: return 8;
+ case Basic_typeid: return 8;
case Basic_int: case Basic_uint:
return build_context.int_size;
@@ -4145,10 +4225,12 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
return size;
}
switch (kind) {
- case Basic_string: return 2*build_context.int_size;
- case Basic_cstring: return build_context.ptr_size;
- case Basic_any: return 16;
- case Basic_typeid: return 8;
+ case Basic_string: return 2*build_context.int_size;
+ case Basic_cstring: return build_context.ptr_size;
+ case Basic_string16: return 2*build_context.int_size;
+ case Basic_cstring16: return build_context.ptr_size;
+ case Basic_any: return 16;
+ case Basic_typeid: return 8;
case Basic_int: case Basic_uint:
return build_context.int_size;
@@ -4380,6 +4462,15 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) {
if (field_type_) *field_type_ = t_int;
return build_context.int_size; // len
}
+ } else if (t->Basic.kind == Basic_string16) {
+ switch (index) {
+ case 0:
+ if (field_type_) *field_type_ = t_u16_ptr;
+ return 0; // data
+ case 1:
+ if (field_type_) *field_type_ = t_int;
+ return build_context.int_size; // len
+ }
} else if (t->Basic.kind == Basic_any) {
switch (index) {
case 0:
@@ -4456,6 +4547,11 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) {
case 0: t = t_rawptr; break;
case 1: t = t_int; break;
}
+ } else if (t->Basic.kind == Basic_string16) {
+ switch (index) {
+ case 0: t = t_rawptr; break;
+ case 1: t = t_int; break;
+ }
} else if (t->Basic.kind == Basic_any) {
switch (index) {
case 0: t = t_rawptr; break;
@@ -4697,6 +4793,11 @@ gb_internal Type *type_internal_index(Type *t, isize index) {
GB_ASSERT(index == 0 || index == 1);
return index == 0 ? t_u8_ptr : t_int;
}
+ case Basic_string16:
+ {
+ GB_ASSERT(index == 0 || index == 1);
+ return index == 0 ? t_u16_ptr : t_int;
+ }
case Basic_any:
{
GB_ASSERT(index == 0 || index == 1);