diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2025-08-06 16:09:18 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-06 16:09:18 +0100 |
| commit | 09a1e170bc92a0ea48a8ee67599c2936e924fe4d (patch) | |
| tree | 92b44b34a1f2f0c4a8c96a49ab61bb5177432ed7 /src/llvm_backend_expr.cpp | |
| parent | ec7509430369eb5d57a081507792dc03b1c05bab (diff) | |
| parent | af3184adc96cef59fff986ea6400caa6dbdb56ae (diff) | |
Merge pull request #5530 from odin-lang/bill/utf16-strings
UTF-16 string types: `string16` & `cstring16`
Diffstat (limited to 'src/llvm_backend_expr.cpp')
| -rw-r--r-- | src/llvm_backend_expr.cpp | 142 |
1 files changed, 139 insertions, 3 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 74aea82f1..5425572c7 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -1559,16 +1559,24 @@ gb_internal lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) { return lb_emit_conv(p, cmp, type); } else if (lb_is_empty_string_constant(be->right) && !is_type_union(be->left->tav.type)) { // `x == ""` or `x != ""` + Type *str_type = t_string; + if (is_type_string16(be->left->tav.type) || is_type_cstring16(be->left->tav.type)) { + str_type = t_string16; + } lbValue s = lb_build_expr(p, be->left); - s = lb_emit_conv(p, s, t_string); + s = lb_emit_conv(p, s, str_type); lbValue len = lb_string_len(p, s); lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0)); Type *type = default_type(tv.type); return lb_emit_conv(p, cmp, type); } else if (lb_is_empty_string_constant(be->left) && !is_type_union(be->right->tav.type)) { // `"" == x` or `"" != x` + Type *str_type = t_string; + if (is_type_string16(be->right->tav.type) || is_type_cstring16(be->right->tav.type)) { + str_type = t_string16; + } lbValue s = lb_build_expr(p, be->right); - s = lb_emit_conv(p, s, t_string); + s = lb_emit_conv(p, s, str_type); lbValue len = lb_string_len(p, s); lbValue cmp = lb_emit_comp(p, be->op.kind, len, lb_const_int(p->module, t_int, 0)); Type *type = default_type(tv.type); @@ -1656,6 +1664,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { res.type = t; res.value = llvm_cstring(m, str); return res; + } else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) { + GB_PANIC("TODO(bill): UTF-16 string"); } // if (is_type_float(dst)) { // return value; @@ -1795,6 +1805,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } + + if (is_type_cstring16(src) && is_type_u16_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u16_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_rawptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_rawptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + TEMPORARY_ALLOCATOR_GUARD(); + + lbValue c = lb_emit_conv(p, value, t_cstring16); + auto args = array_make<lbValue>(temporary_allocator(), 1); + args[0] = c; + lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args); + return lb_emit_conv(p, s, dst); + } + + + // integer -> boolean if (is_type_integer(src) && is_type_boolean(dst)) { lbValue res = {}; @@ -2296,6 +2338,29 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return res; } + // [^]u16 <-> cstring16 + if (is_type_u16_multi_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_u16_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_cstring16(src) && is_type_u16_ptr(dst)) { + return lb_emit_transmute(p, value, t); + } + + + // []u16 <-> string16 + if (is_type_u16_slice(src) && is_type_string16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_string16(src) && is_type_u16_slice(dst)) { + return lb_emit_transmute(p, value, t); + } + // []byte/[]u8 <-> string if (is_type_u8_slice(src) && is_type_string(dst)) { return lb_emit_transmute(p, value, t); @@ -2304,6 +2369,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return lb_emit_transmute(p, value, t); } + if (is_type_array_like(dst)) { Type *elem = base_array_type(dst); isize index_count = cast(isize)get_array_type_count(dst); @@ -2710,7 +2776,53 @@ gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left return lb_compare_records(p, op_kind, left, right, b); } + + if (is_type_string16(a) || is_type_cstring16(a)) { + if (is_type_cstring16(a) && is_type_cstring16(b)) { + left = lb_emit_conv(p, left, t_cstring16); + right = lb_emit_conv(p, right, t_cstring16); + char const *runtime_procedure = nullptr; + switch (op_kind) { + case Token_CmpEq: runtime_procedure = "cstring16_eq"; break; + case Token_NotEq: runtime_procedure = "cstring16_ne"; break; + case Token_Lt: runtime_procedure = "cstring16_lt"; break; + case Token_Gt: runtime_procedure = "cstring16_gt"; break; + case Token_LtEq: runtime_procedure = "cstring16_le"; break; + case Token_GtEq: runtime_procedure = "cstring16_ge"; break; + } + GB_ASSERT(runtime_procedure != nullptr); + + auto args = array_make<lbValue>(permanent_allocator(), 2); + args[0] = left; + args[1] = right; + return lb_emit_runtime_call(p, runtime_procedure, args); + } + + + if (is_type_cstring16(a) ^ is_type_cstring16(b)) { + left = lb_emit_conv(p, left, t_string16); + right = lb_emit_conv(p, right, t_string16); + } + + char const *runtime_procedure = nullptr; + switch (op_kind) { + case Token_CmpEq: runtime_procedure = "string16_eq"; break; + case Token_NotEq: runtime_procedure = "string16_ne"; break; + case Token_Lt: runtime_procedure = "string16_lt"; break; + case Token_Gt: runtime_procedure = "string16_gt"; break; + case Token_LtEq: runtime_procedure = "string16_le"; break; + case Token_GtEq: runtime_procedure = "string16_ge"; break; + } + GB_ASSERT(runtime_procedure != nullptr); + + auto args = array_make<lbValue>(permanent_allocator(), 2); + args[0] = left; + args[1] = right; + return lb_emit_runtime_call(p, runtime_procedure, args); + } + if (is_type_string(a)) { + if (is_type_cstring(a) && is_type_cstring(b)) { left = lb_emit_conv(p, left, t_cstring); right = lb_emit_conv(p, right, t_cstring); @@ -3056,6 +3168,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); } return res; + case Basic_cstring16: + if (op_kind == Token_CmpEq) { + res.value = LLVMBuildIsNull(p->builder, x.value, ""); + } else if (op_kind == Token_NotEq) { + res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); + } + return res; case Basic_any: { // TODO(bill): is this correct behaviour for nil comparison for any? @@ -4298,12 +4417,13 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) { } - case Type_Basic: { // Basic_string + case Type_Basic: { // Basic_string/Basic_string16 lbValue str; lbValue elem; lbValue len; lbValue index; + str = lb_build_expr(p, ie->expr); if (deref) { str = lb_emit_load(p, str); @@ -4432,6 +4552,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) { } case Type_Basic: { + if (is_type_string16(type)) { + GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type)); + lbValue len = lb_string_len(p, base); + if (high.value == nullptr) high = len; + + if (!no_indices) { + lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr); + } + + lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low); + lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int); + + lbAddr str = lb_add_local_generated(p, t_string16, false); + lb_fill_string(p, str, elem, new_len); + return str; + } GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type)); lbValue len = lb_string_len(p, base); if (high.value == nullptr) high = len; |