aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2025-08-02 11:55:16 +0100
committergingerBill <gingerBill@users.noreply.github.com>2025-08-02 11:55:16 +0100
commitae02d3d02d2eb5132fa7c6573ed7db20d7e18f3e (patch)
tree0f9f591df4a9862013ff79ef2e5ea3f8c050c393 /src
parent2561427dd396a69cd49eb02c0814c4e8e8b3a08f (diff)
Begin supporting `string16` across the core library
Diffstat (limited to 'src')
-rw-r--r--src/check_builtin.cpp2
-rw-r--r--src/check_expr.cpp42
-rw-r--r--src/checker_builtin_procs.hpp2
-rw-r--r--src/llvm_backend.cpp6
-rw-r--r--src/llvm_backend_const.cpp77
-rw-r--r--src/llvm_backend_debug.cpp14
-rw-r--r--src/llvm_backend_expr.cpp3
-rw-r--r--src/llvm_backend_general.cpp37
-rw-r--r--src/llvm_backend_utility.cpp19
-rw-r--r--src/string.cpp7
10 files changed, 196 insertions, 13 deletions
diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp
index d36cf4520..4abace637 100644
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -19,6 +19,7 @@ gb_global BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_bool
is_type_complex,
is_type_quaternion,
is_type_string,
+ is_type_string16,
is_type_typeid,
is_type_any,
is_type_endian_platform,
@@ -6139,6 +6140,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
case BuiltinProc_type_is_complex:
case BuiltinProc_type_is_quaternion:
case BuiltinProc_type_is_string:
+ case BuiltinProc_type_is_string16:
case BuiltinProc_type_is_typeid:
case BuiltinProc_type_is_any:
case BuiltinProc_type_is_endian_platform:
diff --git a/src/check_expr.cpp b/src/check_expr.cpp
index 57073e22f..8d2e4d637 100644
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2106,6 +2106,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
} else if (is_type_boolean(type)) {
return in_value.kind == ExactValue_Bool;
} else if (is_type_string(type)) {
+ if (in_value.kind == ExactValue_String16) {
+ return is_type_string16(type) || is_type_cstring16(type);
+ }
return in_value.kind == ExactValue_String;
} else if (is_type_integer(type) || is_type_rune(type)) {
if (in_value.kind == ExactValue_Bool) {
@@ -2320,6 +2323,9 @@ gb_internal bool check_representable_as_constant(CheckerContext *c, ExactValue i
if (in_value.kind == ExactValue_String) {
return false;
}
+ if (in_value.kind == ExactValue_String16) {
+ return false;
+ }
if (out_value) *out_value = in_value;
} else if (is_type_bit_set(type)) {
if (in_value.kind == ExactValue_Integer) {
@@ -4654,6 +4660,13 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar
break;
}
}
+ } else if (operand->value.kind == ExactValue_String16) {
+ String16 s = operand->value.value_string16;
+ if (is_type_u16_array(t)) {
+ if (s.len == t->Array.count) {
+ break;
+ }
+ }
}
operand->mode = Addressing_Invalid;
convert_untyped_error(c, operand, target_type);
@@ -4983,6 +4996,12 @@ gb_internal ExactValue get_constant_field_single(CheckerContext *c, ExactValue v
if (success_) *success_ = true;
if (finish_) *finish_ = true;
return exact_value_u64(val);
+ } else if (value.kind == ExactValue_String16) {
+ GB_ASSERT(0 <= index && index < value.value_string.len);
+ u16 val = value.value_string16[index];
+ if (success_) *success_ = true;
+ if (finish_) *finish_ = true;
+ return exact_value_u64(val);
}
if (value.kind != ExactValue_Compound) {
if (success_) *success_ = true;
@@ -11124,15 +11143,21 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node,
o->expr = node;
return kind;
}
-
- String s = {};
- if (o->value.kind == ExactValue_String) {
- s = o->value.value_string;
- }
-
o->mode = Addressing_Constant;
o->type = t;
- o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+
+ if (o->value.kind == ExactValue_String16) {
+ String16 s = o->value.value_string16;
+
+ o->value = exact_value_string16(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+ } else {
+ String s = {};
+ if (o->value.kind == ExactValue_String) {
+ s = o->value.value_string;
+ }
+
+ o->value = exact_value_string(substring(s, cast(isize)indices[0], cast(isize)indices[1]));
+ }
}
return kind;
}
@@ -11221,6 +11246,7 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast
Type *t = t_invalid;
switch (node->tav.value.kind) {
case ExactValue_String: t = t_untyped_string; break;
+ case ExactValue_String16: t = t_string16; break; // TODO(bill): determine this correctly
case ExactValue_Float: t = t_untyped_float; break;
case ExactValue_Complex: t = t_untyped_complex; break;
case ExactValue_Quaternion: t = t_untyped_quaternion; break;
@@ -11657,6 +11683,8 @@ gb_internal bool is_exact_value_zero(ExactValue const &v) {
return !v.value_bool;
case ExactValue_String:
return v.value_string.len == 0;
+ case ExactValue_String16:
+ return v.value_string16.len == 0;
case ExactValue_Integer:
return big_int_is_zero(&v.value_integer);
case ExactValue_Float:
diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp
index 8e135ab10..bff887d9e 100644
--- a/src/checker_builtin_procs.hpp
+++ b/src/checker_builtin_procs.hpp
@@ -250,6 +250,7 @@ BuiltinProc__type_simple_boolean_begin,
BuiltinProc_type_is_complex,
BuiltinProc_type_is_quaternion,
BuiltinProc_type_is_string,
+ BuiltinProc_type_is_string16,
BuiltinProc_type_is_typeid,
BuiltinProc_type_is_any,
@@ -607,6 +608,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
{STR_LIT("type_is_complex"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_quaternion"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_string"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
+ {STR_LIT("type_is_string16"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_typeid"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
{STR_LIT("type_is_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp
index 13a1d8cf3..f37415cc1 100644
--- a/src/llvm_backend.cpp
+++ b/src/llvm_backend.cpp
@@ -1264,7 +1264,13 @@ String lb_get_objc_type_encoding(Type *t, isize pointer_depth = 0) {
case Basic_string:
return build_context.metrics.int_size == 4 ? str_lit("{string=*i}") : str_lit("{string=*q}");
+ case Basic_string16:
+ return build_context.metrics.int_size == 4 ? str_lit("{string16=*i}") : str_lit("{string16=*q}");
+
case Basic_cstring: return str_lit("*");
+ case Basic_cstring16: return str_lit("*");
+
+
case Basic_any: return str_lit("{any=^v^v}"); // rawptr + ^Type_Info
case Basic_typeid:
diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp
index c3112934e..8c05ed4a2 100644
--- a/src/llvm_backend_const.cpp
+++ b/src/llvm_backend_const.cpp
@@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) {
gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+ GB_ASSERT(!is_type_string16(t));
+ if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
+ LLVMValueRef values[3] = {
+ data,
+ LLVMConstNull(lb_type(m, t_i32)),
+ len,
+ };
+ return llvm_const_named_struct_internal(lb_type(m, t), values, 3);
+ } else {
+ LLVMValueRef values[2] = {
+ data,
+ len,
+ };
+ return llvm_const_named_struct_internal(lb_type(m, t), values, 2);
+ }
+}
+
+gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) {
+ GB_ASSERT(is_type_string16(t));
if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
LLVMValueRef values[3] = {
data,
@@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) {
return lb_const_value(m, t_string, exact_value_string(value));
}
+gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) {
+ return lb_const_value(m, t_string16, exact_value_string16(value));
+}
+
gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) {
lbValue res = {};
@@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
GB_ASSERT(is_type_slice(type));
res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value;
return res;
- } else {
+ } else if (value.kind == ExactValue_String16) {
+ GB_ASSERT(is_type_slice(type));
+ GB_PANIC("TODO(bill): UTF-16 String");
+ return res;
+ }else {
ast_node(cl, CompoundLit, value.value_compound);
isize count = cl->elems.count;
@@ -751,15 +778,23 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
{
bool custom_link_section = cc.link_section.len > 0;
- LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+ LLVMValueRef ptr = nullptr;
lbValue res = {};
res.type = default_type(original_type);
+ if (is_type_string16(res.type) || is_type_cstring16(res.type)) {
+ TEMPORARY_ALLOCATOR_GUARD();
+ String16 s16 = string_to_string16(temporary_allocator(), value.value_string);
+ ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section);
+ } else {
+ ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section);
+ }
+
if (custom_link_section) {
LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
}
- if (is_type_cstring(res.type)) {
+ if (is_type_cstring(res.type) || is_type_cstring16(res.type)) {
res.value = ptr;
} else {
if (value.value_string.len == 0) {
@@ -768,12 +803,46 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb
LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true);
GB_ASSERT(is_type_string(original_type));
- res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+ if (is_type_string16(res.type)) {
+ res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
+ } else {
+ res.value = llvm_const_string_internal(m, original_type, ptr, str_len);
+ }
+ }
+
+ return res;
+ }
+
+ case ExactValue_String16:
+ {
+ GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type));
+
+ bool custom_link_section = cc.link_section.len > 0;
+
+ LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section);
+ lbValue res = {};
+ res.type = default_type(original_type);
+
+ if (custom_link_section) {
+ LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section));
+ }
+
+ if (is_type_cstring16(res.type)) {
+ res.value = ptr;
+ } else {
+ if (value.value_string16.len == 0) {
+ ptr = LLVMConstNull(lb_type(m, t_u8_ptr));
+ }
+ LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true);
+ GB_ASSERT(is_type_string(original_type));
+
+ res.value = llvm_const_string16_internal(m, original_type, ptr, str_len);
}
return res;
}
+
case ExactValue_Integer:
if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) {
LLVMTypeRef t = lb_type(m, original_type);
diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp
index 024c5564e..182920fc7 100644
--- a/src/llvm_backend_debug.cpp
+++ b/src/llvm_backend_debug.cpp
@@ -802,6 +802,20 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("char"), 8, LLVMDWARFTypeEncoding_Unsigned);
return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring", 7);
}
+
+ case Basic_string16:
+ {
+ LLVMMetadataRef elements[2] = {};
+ elements[0] = lb_debug_struct_field(m, str_lit("data"), t_u16_ptr, 0);
+ elements[1] = lb_debug_struct_field(m, str_lit("len"), t_int, int_bits);
+ return lb_debug_basic_struct(m, str_lit("string16"), 2*int_bits, int_bits, elements, gb_count_of(elements));
+ }
+ case Basic_cstring16:
+ {
+ LLVMMetadataRef char_type = lb_debug_type_basic_type(m, str_lit("wchar_t"), 16, LLVMDWARFTypeEncoding_Unsigned);
+ return LLVMDIBuilderCreatePointerType(m->debug_builder, char_type, ptr_bits, ptr_bits, 0, "cstring16", 7);
+ }
+
case Basic_any:
{
LLVMMetadataRef elements[2] = {};
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index fbf0dea11..3463b6083 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -4354,12 +4354,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) {
}
- case Type_Basic: { // Basic_string
+ case Type_Basic: { // Basic_string/Basic_string16
lbValue str;
lbValue elem;
lbValue len;
lbValue index;
+
str = lb_build_expr(p, ie->expr);
if (deref) {
str = lb_emit_load(p, str);
diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp
index d9771a75b..9ef1c23c0 100644
--- a/src/llvm_backend_general.cpp
+++ b/src/llvm_backend_general.cpp
@@ -2715,6 +2715,43 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co
}
}
+gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) {
+ // TODO(bill): caching for UTF-16 strings
+
+ LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)};
+
+ LLVMValueRef data = nullptr;
+ {
+ LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx);
+
+ TEMPORARY_ALLOCATOR_GUARD();
+
+ LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1);
+
+ for (isize i = 0; i < str.len; i++) {
+ values[i] = LLVMConstInt(llvm_u16, str.text[i], false);
+ }
+ values[str.len] = LLVMConstInt(llvm_u16, 0, false);
+
+ data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1));
+ }
+
+
+ u32 id = m->global_array_index.fetch_add(1);
+ gbString name = gb_string_make(temporary_allocator(), "csbs$");
+ name = gb_string_appendc(name, m->module_name);
+ name = gb_string_append_fmt(name, "$%x", id);
+
+ LLVMTypeRef type = LLVMTypeOf(data);
+ LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name);
+ LLVMSetInitializer(global_data, data);
+ lb_make_global_private_const(global_data);
+ LLVMSetAlignment(global_data, 1);
+
+ LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2);
+ return ptr;
+}
+
gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str, bool custom_link_section) {
LLVMValueRef ptr = nullptr;
if (str.len != 0) {
diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp
index d4117b7ff..ea1bae4e9 100644
--- a/src/llvm_backend_utility.cpp
+++ b/src/llvm_backend_utility.cpp
@@ -6,6 +6,7 @@ gb_internal bool lb_is_type_aggregate(Type *t) {
case Type_Basic:
switch (t->Basic.kind) {
case Basic_string:
+ case Basic_string16:
case Basic_any:
return true;
@@ -981,7 +982,8 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) {
} else if (build_context.ptr_size != build_context.int_size) {
switch (t->kind) {
case Type_Basic:
- if (t->Basic.kind != Basic_string) {
+ if (t->Basic.kind != Basic_string &&
+ t->Basic.kind != Basic_string16) {
break;
}
/*fallthrough*/
@@ -1160,6 +1162,11 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) {
case 0: result_type = alloc_type_pointer(t->Slice.elem); break;
case 1: result_type = t_int; break;
}
+ } else if (is_type_string16(t)) {
+ switch (index) {
+ case 0: result_type = t_u16_ptr; break;
+ case 1: result_type = t_int; break;
+ }
} else if (is_type_string(t)) {
switch (index) {
case 0: result_type = t_u8_ptr; break;
@@ -1273,6 +1280,12 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) {
switch (t->kind) {
case Type_Basic:
switch (t->Basic.kind) {
+ case Basic_string16:
+ switch (index) {
+ case 0: result_type = t_u16_ptr; break;
+ case 1: result_type = t_int; break;
+ }
+ break;
case Basic_string:
switch (index) {
case 0: result_type = t_u8_ptr; break;
@@ -1440,6 +1453,10 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection
e = lb_emit_struct_ep(p, e, index);
break;
+ case Basic_string16:
+ e = lb_emit_struct_ep(p, e, index);
+ break;
+
default:
GB_PANIC("un-gep-able type %s", type_to_string(type));
break;
diff --git a/src/string.cpp b/src/string.cpp
index 8405938f4..8cc0e93f3 100644
--- a/src/string.cpp
+++ b/src/string.cpp
@@ -79,6 +79,13 @@ gb_internal String substring(String const &s, isize lo, isize hi) {
return make_string(s.text+lo, hi-lo);
}
+gb_internal String16 substring(String16 const &s, isize lo, isize hi) {
+ isize max = s.len;
+ GB_ASSERT_MSG(lo <= hi && hi <= max, "%td..%td..%td", lo, hi, max);
+
+ return make_string16(s.text+lo, hi-lo);
+}
+
gb_internal char *alloc_cstring(gbAllocator a, String s) {
char *c_str = gb_alloc_array(a, char, s.len+1);