From ae02d3d02d2eb5132fa7c6573ed7db20d7e18f3e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 11:55:16 +0100 Subject: Begin supporting `string16` across the core library --- src/llvm_backend_const.cpp | 77 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) (limited to 'src/llvm_backend_const.cpp') diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index c3112934e..8c05ed4a2 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -122,6 +122,25 @@ gb_internal lbValue lb_const_ptr_cast(lbModule *m, lbValue value, Type *t) { gb_internal LLVMValueRef llvm_const_string_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(!is_type_string16(t)); + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + LLVMValueRef values[3] = { + data, + LLVMConstNull(lb_type(m, t_i32)), + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 3); + } else { + LLVMValueRef values[2] = { + data, + len, + }; + return llvm_const_named_struct_internal(lb_type(m, t), values, 2); + } +} + +gb_internal LLVMValueRef llvm_const_string16_internal(lbModule *m, Type *t, LLVMValueRef data, LLVMValueRef len) { + GB_ASSERT(is_type_string16(t)); if (build_context.metrics.ptr_size < build_context.metrics.int_size) { LLVMValueRef values[3] = { data, @@ -238,6 +257,10 @@ gb_internal lbValue lb_const_string(lbModule *m, String const &value) { return lb_const_value(m, t_string, exact_value_string(value)); } +gb_internal lbValue lb_const_string(lbModule *m, String16 const &value) { + return lb_const_value(m, t_string16, exact_value_string16(value)); +} + gb_internal lbValue lb_const_bool(lbModule *m, Type *type, bool value) { lbValue res = {}; @@ -569,7 +592,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb GB_ASSERT(is_type_slice(type)); res.value = lb_find_or_add_entity_string_byte_slice_with_type(m, value.value_string, original_type).value; return res; - } else { + } else if (value.kind == ExactValue_String16) { + GB_ASSERT(is_type_slice(type)); + GB_PANIC("TODO(bill): UTF-16 String"); + return res; + }else { ast_node(cl, CompoundLit, value.value_compound); isize count = cl->elems.count; @@ -751,15 +778,23 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb { bool custom_link_section = cc.link_section.len > 0; - LLVMValueRef ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + LLVMValueRef ptr = nullptr; lbValue res = {}; res.type = default_type(original_type); + if (is_type_string16(res.type) || is_type_cstring16(res.type)) { + TEMPORARY_ALLOCATOR_GUARD(); + String16 s16 = string_to_string16(temporary_allocator(), value.value_string); + ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section); + } else { + ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); + } + if (custom_link_section) { LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); } - if (is_type_cstring(res.type)) { + if (is_type_cstring(res.type) || is_type_cstring16(res.type)) { res.value = ptr; } else { if (value.value_string.len == 0) { @@ -768,12 +803,46 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true); GB_ASSERT(is_type_string(original_type)); - res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + if (is_type_string16(res.type)) { + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); + } else { + res.value = llvm_const_string_internal(m, original_type, ptr, str_len); + } + } + + return res; + } + + case ExactValue_String16: + { + GB_ASSERT(is_type_string16(res.type) || is_type_cstring16(res.type)); + + bool custom_link_section = cc.link_section.len > 0; + + LLVMValueRef ptr = lb_find_or_add_entity_string16_ptr(m, value.value_string16, custom_link_section); + lbValue res = {}; + res.type = default_type(original_type); + + if (custom_link_section) { + LLVMSetSection(ptr, alloc_cstring(permanent_allocator(), cc.link_section)); + } + + if (is_type_cstring16(res.type)) { + res.value = ptr; + } else { + if (value.value_string16.len == 0) { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string16.len, true); + GB_ASSERT(is_type_string(original_type)); + + res.value = llvm_const_string16_internal(m, original_type, ptr, str_len); } return res; } + case ExactValue_Integer: if (is_type_pointer(type) || is_type_multi_pointer(type) || is_type_proc(type)) { LLVMTypeRef t = lb_type(m, original_type); -- cgit v1.2.3 From 620bf162a048fdf29fdfcedc12abae79cffeedf4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 12:32:18 +0100 Subject: Cache const `string16` in LLVM --- src/check_builtin.cpp | 13 +++++--- src/common.cpp | 1 + src/llvm_backend.hpp | 3 +- src/llvm_backend_const.cpp | 2 +- src/llvm_backend_expr.cpp | 6 ---- src/llvm_backend_general.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 83 insertions(+), 13 deletions(-) (limited to 'src/llvm_backend_const.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 66ea0cfbd..da5eb8977 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2329,10 +2329,15 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As if (operand->mode == Addressing_Constant) { mode = Addressing_Constant; - GB_ASSERT_MSG(!is_type_string16(op_type), "TODO(bill): constant utf-16 string len"); - - String str = operand->value.value_string; - value = exact_value_i64(str.len); + if (operand->value.kind == ExactValue_String) { + String str = operand->value.value_string; + value = exact_value_i64(str.len); + } else if (operand->value.kind == ExactValue_String16) { + String16 str = operand->value.value_string16; + value = exact_value_i64(str.len); + } else { + GB_PANIC("Unhandled value kind: %d", operand->value.kind); + } type = t_untyped_integer; } else { mode = Addressing_Value; diff --git a/src/common.cpp b/src/common.cpp index b3761fc36..53848cacf 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -350,6 +350,7 @@ gb_global bool global_module_path_set = false; #include "ptr_map.cpp" #include "ptr_set.cpp" #include "string_map.cpp" +#include "string16_map.cpp" #include "string_set.cpp" #include "priority_queue.cpp" #include "thread_pool.cpp" diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index fef6e754d..648e8a732 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -173,7 +173,8 @@ struct lbModule { PtrMap procedure_values; Array missing_procedures_to_check; - StringMap const_strings; + StringMap const_strings; + String16Map const_string16s; PtrMap function_type_map; diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 8c05ed4a2..cba0000cd 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -594,7 +594,7 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb return res; } else if (value.kind == ExactValue_String16) { GB_ASSERT(is_type_slice(type)); - GB_PANIC("TODO(bill): UTF-16 String"); + res.value = lb_find_or_add_entity_string16_slice_with_type(m, value.value_string16, original_type).value; return res; }else { ast_node(cl, CompoundLit, value.value_compound); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 3463b6083..8ad6a5a1c 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -2526,12 +2526,6 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { if (is_type_untyped(src)) { - if (is_type_string(src) && is_type_string16(dst)) { - GB_PANIC("TODO(bill): UTF-16 string"); - lbAddr result = lb_add_local_generated(p, t, false); - lb_addr_store(p, result, value); - return lb_addr_load(p, result); - } if (is_type_string(src) && is_type_string(dst)) { lbAddr result = lb_add_local_generated(p, t, false); lb_addr_store(p, result, value); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 9ef1c23c0..064d0ef39 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -85,6 +85,7 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) { string_map_init(&m->members); string_map_init(&m->procedures); string_map_init(&m->const_strings); + string16_map_init(&m->const_string16s); map_init(&m->function_type_map); string_map_init(&m->gen_procs); if (USE_SEPARATE_MODULES) { @@ -2716,7 +2717,18 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co } gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) { - // TODO(bill): caching for UTF-16 strings + String16HashKey key = {}; + LLVMValueRef *found = nullptr; + + if (!custom_link_section) { + key = string_hash_string(str); + found = string16_map_get(&m->const_string16s, key); + } + if (found != nullptr) { + return *found; + } + + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; @@ -2749,6 +2761,9 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String1 LLVMSetAlignment(global_data, 1); LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + if (!custom_link_section) { + string16_map_set(&m->const_string16s, key, ptr); + } return ptr; } @@ -2812,6 +2827,60 @@ gb_internal lbValue lb_find_or_add_entity_string_byte_slice_with_type(lbModule * return res; } +gb_internal lbValue lb_find_or_add_entity_string16_slice_with_type(lbModule *m, String16 const &str, Type *slice_type) { + GB_ASSERT(is_type_slice(slice_type)); + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csba$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 1); + + i64 data_len = str.len; + LLVMValueRef ptr = nullptr; + if (data_len != 0) { + ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + } else { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + if (!is_type_u16_slice(slice_type)) { + Type *bt = base_type(slice_type); + Type *elem = bt->Slice.elem; + i64 sz = type_size_of(elem); + GB_ASSERT(sz > 0); + ptr = LLVMConstPointerCast(ptr, lb_type(m, alloc_type_pointer(elem))); + data_len /= sz; + } + + LLVMValueRef len = LLVMConstInt(lb_type(m, t_int), data_len, true); + LLVMValueRef values[2] = {ptr, len}; + + lbValue res = {}; + res.value = llvm_const_named_struct(m, slice_type, values, 2); + res.type = slice_type; + return res; +} gb_internal lbValue lb_find_ident(lbProcedure *p, lbModule *m, Entity *e, Ast *expr) { -- cgit v1.2.3 From dca9bf0b0c8a3ad2ac6854c901d99868d3a296d5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 13:11:34 +0100 Subject: Fix string16 literal length set in LLVM --- src/llvm_backend_const.cpp | 13 ++++++++++--- src/llvm_backend_general.cpp | 4 ++-- src/string.cpp | 2 -- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/llvm_backend_const.cpp') diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index cba0000cd..e64be49f2 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -782,9 +782,12 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb lbValue res = {}; res.type = default_type(original_type); + isize len = value.value_string.len; + if (is_type_string16(res.type) || is_type_cstring16(res.type)) { TEMPORARY_ALLOCATOR_GUARD(); String16 s16 = string_to_string16(temporary_allocator(), value.value_string); + len = s16.len; ptr = lb_find_or_add_entity_string16_ptr(m, s16, custom_link_section); } else { ptr = lb_find_or_add_entity_string_ptr(m, value.value_string, custom_link_section); @@ -797,10 +800,14 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb if (is_type_cstring(res.type) || is_type_cstring16(res.type)) { res.value = ptr; } else { - if (value.value_string.len == 0) { - ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + if (len == 0) { + if (is_type_string16(res.type)) { + ptr = LLVMConstNull(lb_type(m, t_u16_ptr)); + } else { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } } - LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), value.value_string.len, true); + LLVMValueRef str_len = LLVMConstInt(lb_type(m, t_int), len, true); GB_ASSERT(is_type_string(original_type)); if (is_type_string16(res.type)) { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 064d0ef39..d84b8302b 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2758,7 +2758,7 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String1 LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); LLVMSetInitializer(global_data, data); lb_make_global_private_const(global_data); - LLVMSetAlignment(global_data, 1); + LLVMSetAlignment(global_data, 2); LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); if (!custom_link_section) { @@ -2855,7 +2855,7 @@ gb_internal lbValue lb_find_or_add_entity_string16_slice_with_type(lbModule *m, LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); LLVMSetInitializer(global_data, data); lb_make_global_private_const(global_data); - LLVMSetAlignment(global_data, 1); + LLVMSetAlignment(global_data, 2); i64 data_len = str.len; LLVMValueRef ptr = nullptr; diff --git a/src/string.cpp b/src/string.cpp index 8cc0e93f3..2087a5fee 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -658,7 +658,6 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons -// TODO(bill): Make this non-windows specific gb_internal String16 string_to_string16(gbAllocator a, String s) { int len, len1; u16 *text; @@ -680,7 +679,6 @@ gb_internal String16 string_to_string16(gbAllocator a, String s) { return make_string16(nullptr, 0); } text[len] = 0; - return make_string16(text, len); } -- cgit v1.2.3