aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend_proc.cpp
diff options
context:
space:
mode:
authorCourtney Strachan <courtney.strachan@gmail.com>2025-10-06 02:41:44 +0100
committerGitHub <noreply@github.com>2025-10-06 02:41:44 +0100
commit6de2d6e8ca687c989bbb7806e5cbe8d791e425bf (patch)
tree03a2e0a84c7c1530215f8e3f59a7f643b39b3677 /src/llvm_backend_proc.cpp
parentdbbe96ae5c343f0e803de6ee508207a62571534f (diff)
parent0f97382fa3e46da80705c00dfe02f3deb9562e4f (diff)
Merge branch 'odin-lang:master' into master
Diffstat (limited to 'src/llvm_backend_proc.cpp')
-rw-r--r--src/llvm_backend_proc.cpp645
1 files changed, 576 insertions, 69 deletions
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index a835ae2c8..08ef42f01 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -67,6 +67,14 @@ gb_internal void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValu
gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body) {
GB_ASSERT(entity != nullptr);
GB_ASSERT(entity->kind == Entity_Procedure);
+ // Skip codegen for unspecialized polymorphic procedures
+ if (is_type_polymorphic(entity->type) && !entity->Procedure.is_foreign) {
+ Type *bt = base_type(entity->type);
+ if (bt->kind == Type_Proc && bt->Proc.is_polymorphic && !bt->Proc.is_poly_specialized) {
+ // Do not generate code for unspecialized polymorphic procedures
+ return nullptr;
+ }
+ }
if (!entity->Procedure.is_foreign) {
if ((entity->flags & EntityFlag_ProcBodyChecked) == 0) {
GB_PANIC("%.*s :: %s (was parapoly: %d %d)", LIT(entity->token.string), type_to_string(entity->type), is_type_polymorphic(entity->type, true), is_type_polymorphic(entity->type, false));
@@ -76,7 +84,7 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
String link_name = {};
if (ignore_body) {
- lbModule *other_module = lb_module_of_entity(m->gen, entity);
+ lbModule *other_module = lb_module_of_entity(m->gen, entity, m);
link_name = lb_get_entity_name(other_module, entity);
} else {
link_name = lb_get_entity_name(m, entity);
@@ -91,7 +99,6 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
}
}
-
lbProcedure *p = gb_alloc_item(permanent_allocator(), lbProcedure);
p->module = m;
@@ -115,12 +122,13 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
p->is_entry_point = false;
gbAllocator a = heap_allocator();
- p->children.allocator = a;
- p->defer_stmts.allocator = a;
- p->blocks.allocator = a;
- p->branch_blocks.allocator = a;
- p->context_stack.allocator = a;
- p->scope_stack.allocator = a;
+ p->children.allocator = a;
+ p->defer_stmts.allocator = a;
+ p->blocks.allocator = a;
+ p->branch_blocks.allocator = a;
+ p->context_stack.allocator = a;
+ p->scope_stack.allocator = a;
+ p->asan_stack_locals.allocator = a;
// map_init(&p->selector_values, 0);
// map_init(&p->selector_addr, 0);
// map_init(&p->tuple_fix_map, 0);
@@ -333,10 +341,10 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i
}
if (p->body && entity->pkg && ((entity->pkg->kind == Package_Normal) || (entity->pkg->kind == Package_Init))) {
- if (build_context.sanitizer_flags & SanitizerFlag_Address) {
+ if (build_context.sanitizer_flags & SanitizerFlag_Address && !entity->Procedure.no_sanitize_address) {
lb_add_attribute_to_proc(m, p->value, "sanitize_address");
}
- if (build_context.sanitizer_flags & SanitizerFlag_Memory) {
+ if (build_context.sanitizer_flags & SanitizerFlag_Memory && !entity->Procedure.no_sanitize_memory) {
lb_add_attribute_to_proc(m, p->value, "sanitize_memory");
}
if (build_context.sanitizer_flags & SanitizerFlag_Thread) {
@@ -385,11 +393,12 @@ gb_internal lbProcedure *lb_create_dummy_procedure(lbModule *m, String link_name
p->is_entry_point = false;
gbAllocator a = permanent_allocator();
- p->children.allocator = a;
- p->defer_stmts.allocator = a;
- p->blocks.allocator = a;
- p->branch_blocks.allocator = a;
- p->context_stack.allocator = a;
+ p->children.allocator = a;
+ p->defer_stmts.allocator = a;
+ p->blocks.allocator = a;
+ p->branch_blocks.allocator = a;
+ p->context_stack.allocator = a;
+ p->asan_stack_locals.allocator = a;
map_init(&p->tuple_fix_map, 0);
@@ -536,6 +545,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
GB_ASSERT(p->type != nullptr);
lb_ensure_abi_function_type(p->module, p);
+ if (p->type->Proc.calling_convention == ProcCC_Odin) {
+ lb_push_context_onto_stack_from_implicit_parameter(p);
+ }
{
lbFunctionType *ft = p->abi_function_type;
@@ -733,9 +745,6 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
}
}
- if (p->type->Proc.calling_convention == ProcCC_Odin) {
- lb_push_context_onto_stack_from_implicit_parameter(p);
- }
lb_set_debug_position_to_procedure_begin(p);
if (p->debug_info != nullptr) {
@@ -781,17 +790,15 @@ gb_internal void lb_end_procedure_body(lbProcedure *p) {
p->curr_block = nullptr;
p->state_flags = 0;
-}
-gb_internal void lb_end_procedure(lbProcedure *p) {
+
LLVMDisposeBuilder(p->builder);
}
gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e) {
GB_ASSERT(pd->body != nullptr);
lbModule *m = p->module;
- auto *min_dep_set = &m->info->minimum_dependency_set;
- if (ptr_set_exists(min_dep_set, e) == false) {
+ if (e->min_dep_count.load(std::memory_order_relaxed) == 0) {
// NOTE(bill): Nothing depends upon it so doesn't need to be built
return;
}
@@ -815,6 +822,10 @@ gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e)
e->Procedure.link_name = name;
lbProcedure *nested_proc = lb_create_procedure(p->module, e);
+ if (nested_proc == nullptr) {
+ // This is an unspecialized polymorphic procedure, skip codegen
+ return;
+ }
e->code_gen_procedure = nested_proc;
lbValue value = {};
@@ -823,7 +834,7 @@ gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e)
lb_add_entity(m, e, value);
array_add(&p->children, nested_proc);
- array_add(&m->procedures_to_generate, nested_proc);
+ mpsc_enqueue(&m->procedures_to_generate, nested_proc);
}
@@ -910,13 +921,14 @@ gb_internal lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue
arg_type != param_type) {
LLVMTypeKind arg_kind = LLVMGetTypeKind(arg_type);
LLVMTypeKind param_kind = LLVMGetTypeKind(param_type);
- if (arg_kind == param_kind &&
- arg_kind == LLVMPointerTypeKind) {
- // NOTE(bill): LLVM's newer `ptr` only type system seems to fail at times
- // I don't know why...
- args[i] = LLVMBuildPointerCast(p->builder, args[i], param_type, "");
- arg_type = param_type;
- continue;
+ if (arg_kind == param_kind) {
+ if (arg_kind == LLVMPointerTypeKind) {
+ // NOTE(bill): LLVM's newer `ptr` only type system seems to fail at times
+ // I don't know why...
+ args[i] = LLVMBuildPointerCast(p->builder, args[i], param_type, "");
+ arg_type = param_type;
+ continue;
+ }
}
}
@@ -972,6 +984,7 @@ gb_internal lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue
gb_internal lbValue lb_lookup_runtime_procedure(lbModule *m, String const &name) {
AstPackage *pkg = m->info->runtime_package;
Entity *e = scope_lookup_current(pkg->scope, name);
+ GB_ASSERT_MSG(e != nullptr, "Runtime procedure not found: %s", name);
return lb_find_procedure_value_from_entity(m, e);
}
@@ -1060,6 +1073,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c
lbValue result = {};
+ isize ignored_args = 0;
auto processed_args = array_make<lbValue>(permanent_allocator(), 0, args.count);
{
@@ -1082,6 +1096,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c
lbArgType *arg = &ft->args[param_index];
if (arg->kind == lbArg_Ignore) {
param_index += 1;
+ ignored_args += 1;
continue;
}
@@ -1190,7 +1205,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c
auto tuple_fix_values = slice_make<lbValue>(permanent_allocator(), ret_count);
auto tuple_geps = slice_make<lbValue>(permanent_allocator(), ret_count);
- isize offset = ft->original_arg_count;
+ isize offset = ft->original_arg_count - ignored_args;
for (isize j = 0; j < ret_count-1; j++) {
lbValue ret_arg_ptr = processed_args[offset + j];
lbValue ret_arg = lb_emit_load(p, ret_arg_ptr);
@@ -1293,6 +1308,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
lbValue res = {};
res.type = tv.type;
+ switch (builtin_id) {
+ case BuiltinProc_simd_indices: {
+ Type *type = base_type(res.type);
+ GB_ASSERT(type->kind == Type_SimdVector);
+ Type *elem = type->SimdVector.elem;
+
+ i64 count = type->SimdVector.count;
+ LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
+ for (i64 i = 0; i < count; i++) {
+ scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value;
+ }
+
+ res.value = LLVMConstVector(scalars, cast(unsigned)count);
+ return res;
+ }
+ }
+
lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]);
lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]);
lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]);
@@ -1442,7 +1474,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
LLVMRealPredicate pred = cast(LLVMRealPredicate)0;
switch (builtin_id) {
case BuiltinProc_simd_lanes_eq: pred = LLVMRealOEQ; break;
- case BuiltinProc_simd_lanes_ne: pred = LLVMRealONE; break;
+ case BuiltinProc_simd_lanes_ne: pred = LLVMRealUNE; break;
case BuiltinProc_simd_lanes_lt: pred = LLVMRealOLT; break;
case BuiltinProc_simd_lanes_le: pred = LLVMRealOLE; break;
case BuiltinProc_simd_lanes_gt: pred = LLVMRealOGT; break;
@@ -1478,6 +1510,38 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
res.value = LLVMBuildInsertElement(p->builder, arg0.value, arg2.value, arg1.value, "");
return res;
+ case BuiltinProc_simd_reduce_add_bisect:
+ case BuiltinProc_simd_reduce_mul_bisect:
+ {
+ GB_ASSERT(arg0.type->kind == Type_SimdVector);
+ i64 num_elems = arg0.type->SimdVector.count;
+
+ LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems);
+ for (i64 i = 0; i < num_elems; i++) {
+ indices[i] = lb_const_int(m, t_uint, cast(u64)i).value;
+ }
+
+ switch (builtin_id) {
+ case BuiltinProc_simd_reduce_add_bisect: op_code = is_float ? LLVMFAdd : LLVMAdd; break;
+ case BuiltinProc_simd_reduce_mul_bisect: op_code = is_float ? LLVMFMul : LLVMMul; break;
+ }
+
+ LLVMValueRef remaining = arg0.value;
+ i64 num_remaining = num_elems;
+
+ while (num_remaining > 1) {
+ num_remaining /= 2;
+ LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining);
+ LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, "");
+ LLVMValueRef right_indices = LLVMConstVector(&indices[num_remaining], cast(unsigned)num_remaining);
+ LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, "");
+ remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, "");
+ }
+
+ res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], "");
+ return res;
+ }
+
case BuiltinProc_simd_reduce_add_ordered:
case BuiltinProc_simd_reduce_mul_ordered:
{
@@ -1510,6 +1574,40 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
res.value = lb_call_intrinsic(p, name, args, cast(unsigned)args_count, types, gb_count_of(types));
return res;
}
+
+ case BuiltinProc_simd_reduce_add_pairs:
+ case BuiltinProc_simd_reduce_mul_pairs:
+ {
+ GB_ASSERT(arg0.type->kind == Type_SimdVector);
+ i64 num_elems = arg0.type->SimdVector.count;
+
+ LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems);
+ for (i64 i = 0; i < num_elems/2; i++) {
+ indices[i] = lb_const_int(m, t_uint, cast(u64)(2*i)).value;
+ indices[i+num_elems/2] = lb_const_int(m, t_uint, cast(u64)(2*i+1)).value;
+ }
+
+ switch (builtin_id) {
+ case BuiltinProc_simd_reduce_add_pairs: op_code = is_float ? LLVMFAdd : LLVMAdd; break;
+ case BuiltinProc_simd_reduce_mul_pairs: op_code = is_float ? LLVMFMul : LLVMMul; break;
+ }
+
+ LLVMValueRef remaining = arg0.value;
+ i64 num_remaining = num_elems;
+
+ while (num_remaining > 1) {
+ num_remaining /= 2;
+ LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining);
+ LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, "");
+ LLVMValueRef right_indices = LLVMConstVector(&indices[num_elems/2], cast(unsigned)num_remaining);
+ LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, "");
+ remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, "");
+ }
+
+ res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], "");
+ return res;
+ }
+
case BuiltinProc_simd_reduce_min:
case BuiltinProc_simd_reduce_max:
case BuiltinProc_simd_reduce_and:
@@ -1625,6 +1723,275 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
return res;
}
+ case BuiltinProc_simd_runtime_swizzle:
+ {
+ LLVMValueRef src = arg0.value;
+ LLVMValueRef indices = lb_build_expr(p, ce->args[1]).value;
+
+ Type *vt = arg0.type;
+ GB_ASSERT(vt->kind == Type_SimdVector);
+ i64 count = vt->SimdVector.count;
+ Type *elem_type = vt->SimdVector.elem;
+ i64 elem_size = type_size_of(elem_type);
+
+ // Determine strategy based on element size and target architecture
+ char const *intrinsic_name = nullptr;
+ bool use_hardware_runtime_swizzle = false;
+
+ // 8-bit elements: Use dedicated table lookup instructions
+ if (elem_size == 1) {
+ use_hardware_runtime_swizzle = true;
+
+ if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) {
+ // x86/x86-64: Use pshufb intrinsics
+ switch (count) {
+ case 16:
+ intrinsic_name = "llvm.x86.ssse3.pshuf.b.128";
+ break;
+ case 32:
+ intrinsic_name = "llvm.x86.avx2.pshuf.b";
+ break;
+ case 64:
+ intrinsic_name = "llvm.x86.avx512.pshuf.b.512";
+ break;
+ default:
+ use_hardware_runtime_swizzle = false;
+ break;
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm64) {
+ // ARM64: Use NEON tbl intrinsics with automatic table splitting
+ switch (count) {
+ case 16:
+ intrinsic_name = "llvm.aarch64.neon.tbl1";
+ break;
+ case 32:
+ intrinsic_name = "llvm.aarch64.neon.tbl2";
+ break;
+ case 48:
+ intrinsic_name = "llvm.aarch64.neon.tbl3";
+ break;
+ case 64:
+ intrinsic_name = "llvm.aarch64.neon.tbl4";
+ break;
+ default:
+ use_hardware_runtime_swizzle = false;
+ break;
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm32) {
+ // ARM32: Use NEON vtbl intrinsics with automatic table splitting
+ switch (count) {
+ case 8:
+ intrinsic_name = "llvm.arm.neon.vtbl1";
+ break;
+ case 16:
+ intrinsic_name = "llvm.arm.neon.vtbl2";
+ break;
+ case 24:
+ intrinsic_name = "llvm.arm.neon.vtbl3";
+ break;
+ case 32:
+ intrinsic_name = "llvm.arm.neon.vtbl4";
+ break;
+ default:
+ use_hardware_runtime_swizzle = false;
+ break;
+ }
+ } else if (build_context.metrics.arch == TargetArch_wasm32 || build_context.metrics.arch == TargetArch_wasm64p32) {
+ // WebAssembly: Use swizzle (only supports 16-byte vectors)
+ if (count == 16) {
+ intrinsic_name = "llvm.wasm.swizzle";
+ } else {
+ use_hardware_runtime_swizzle = false;
+ }
+ } else {
+ use_hardware_runtime_swizzle = false;
+ }
+ }
+
+ if (use_hardware_runtime_swizzle && intrinsic_name != nullptr) {
+ // Use dedicated hardware swizzle instruction
+
+ // Check if required target features are enabled
+ bool features_enabled = true;
+ if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) {
+ // x86/x86-64 feature checking
+ if (count == 16) {
+ // SSE/SSSE3 for 128-bit vectors
+ if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr)) {
+ features_enabled = false;
+ }
+ } else if (count == 32) {
+ // AVX2 requires ssse3 + avx2 features
+ if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr) ||
+ !check_target_feature_is_enabled(str_lit("avx2"), nullptr)) {
+ features_enabled = false;
+ }
+ } else if (count == 64) {
+ // AVX512 requires ssse3 + avx2 + avx512f + avx512bw features
+ if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr) ||
+ !check_target_feature_is_enabled(str_lit("avx2"), nullptr) ||
+ !check_target_feature_is_enabled(str_lit("avx512f"), nullptr) ||
+ !check_target_feature_is_enabled(str_lit("avx512bw"), nullptr)) {
+ features_enabled = false;
+ }
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm64 || build_context.metrics.arch == TargetArch_arm32) {
+ // ARM/ARM64 feature checking - NEON is required for all table/swizzle ops
+ if (!check_target_feature_is_enabled(str_lit("neon"), nullptr)) {
+ features_enabled = false;
+ }
+ }
+
+ if (features_enabled) {
+ // Add target features to function attributes for LLVM instruction selection
+ if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) {
+ // x86/x86-64 function attributes
+ if (count == 16) {
+ // SSE/SSSE3 for 128-bit vectors
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+ssse3"));
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("128"));
+ } else if (count == 32) {
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+avx,+avx2,+ssse3"));
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("256"));
+ } else if (count == 64) {
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+avx,+avx2,+avx512f,+avx512bw,+ssse3"));
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("512"));
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm64) {
+ // ARM64 function attributes - enable NEON for swizzle instructions
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+neon"));
+ // Set appropriate vector width for multi-swizzle operations
+ if (count >= 32) {
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("256"));
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm32) {
+ // ARM32 function attributes - enable NEON for swizzle instructions
+ lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+neon"));
+ }
+
+ // Handle ARM's multi-swizzle intrinsics by splitting the src vector
+ if (build_context.metrics.arch == TargetArch_arm64 && count > 16) {
+ // ARM64 TBL2/TBL3/TBL4: Split src into multiple 16-byte vectors
+ int num_tables = cast(int)(count / 16);
+ GB_ASSERT_MSG(count % 16 == 0, "ARM64 src size must be multiple of 16 bytes, got %lld bytes", count);
+ GB_ASSERT_MSG(num_tables <= 4, "ARM64 NEON supports maximum 4 tables (tbl4), got %d tables for %lld-byte vector", num_tables, count);
+
+ LLVMValueRef src_parts[4]; // Max 4 tables for tbl4
+ for (int i = 0; i < num_tables; i++) {
+ // Extract 16-byte slice from the larger src
+ LLVMValueRef indices_for_extract[16];
+ for (int j = 0; j < 16; j++) {
+ indices_for_extract[j] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), i * 16 + j, false);
+ }
+ LLVMValueRef extract_mask = LLVMConstVector(indices_for_extract, 16);
+ src_parts[i] = LLVMBuildShuffleVector(p->builder, src, LLVMGetUndef(LLVMTypeOf(src)), extract_mask, "");
+ }
+
+ // Call appropriate ARM64 tbl intrinsic
+ if (count == 32) {
+ LLVMValueRef args[3] = { src_parts[0], src_parts[1], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 3, nullptr, 0);
+ } else if (count == 48) {
+ LLVMValueRef args[4] = { src_parts[0], src_parts[1], src_parts[2], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 4, nullptr, 0);
+ } else if (count == 64) {
+ LLVMValueRef args[5] = { src_parts[0], src_parts[1], src_parts[2], src_parts[3], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 5, nullptr, 0);
+ }
+ } else if (build_context.metrics.arch == TargetArch_arm32 && count > 8) {
+ // ARM32 VTBL2/VTBL3/VTBL4: Split src into multiple 8-byte vectors
+ int num_tables = cast(int)count / 8;
+ GB_ASSERT_MSG(count % 8 == 0, "ARM32 src size must be multiple of 8 bytes, got %lld bytes", count);
+ GB_ASSERT_MSG(num_tables <= 4, "ARM32 NEON supports maximum 4 tables (vtbl4), got %d tables for %lld-byte vector", num_tables, count);
+
+ LLVMValueRef src_parts[4]; // Max 4 tables for vtbl4
+ for (int i = 0; i < num_tables; i++) {
+ // Extract 8-byte slice from the larger src
+ LLVMValueRef indices_for_extract[8];
+ for (int j = 0; j < 8; j++) {
+ indices_for_extract[j] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), i * 8 + j, false);
+ }
+ LLVMValueRef extract_mask = LLVMConstVector(indices_for_extract, 8);
+ src_parts[i] = LLVMBuildShuffleVector(p->builder, src, LLVMGetUndef(LLVMTypeOf(src)), extract_mask, "");
+ }
+
+ // Call appropriate ARM32 vtbl intrinsic
+ if (count == 16) {
+ LLVMValueRef args[3] = { src_parts[0], src_parts[1], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 3, nullptr, 0);
+ } else if (count == 24) {
+ LLVMValueRef args[4] = { src_parts[0], src_parts[1], src_parts[2], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 4, nullptr, 0);
+ } else if (count == 32) {
+ LLVMValueRef args[5] = { src_parts[0], src_parts[1], src_parts[2], src_parts[3], indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, 5, nullptr, 0);
+ }
+ } else {
+ // Single runtime swizzle case (x86, WebAssembly, ARM single-table)
+ LLVMValueRef args[2] = { src, indices };
+ res.value = lb_call_intrinsic(p, intrinsic_name, args, gb_count_of(args), nullptr, 0);
+ }
+ return res;
+ } else {
+ // Features not enabled, fall back to emulation
+ use_hardware_runtime_swizzle = false;
+ }
+ }
+
+ // Fallback: Emulate with extracts and inserts for all element sizes
+ GB_ASSERT(count > 0 && count <= 64); // Sanity check
+
+ LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
+ LLVMTypeRef i32_type = LLVMInt32TypeInContext(p->module->ctx);
+ LLVMTypeRef elem_llvm_type = lb_type(p->module, elem_type);
+
+ // Calculate mask based on element size and vector count
+ i64 max_index = count - 1;
+ LLVMValueRef index_mask;
+
+ if (elem_size == 1) {
+ // 8-bit: mask to src size (like pshufb behavior)
+ index_mask = LLVMConstInt(elem_llvm_type, max_index, false);
+ } else if (elem_size == 2) {
+ // 16-bit: mask to src size
+ index_mask = LLVMConstInt(elem_llvm_type, max_index, false);
+ } else if (elem_size == 4) {
+ // 32-bit: mask to src size
+ index_mask = LLVMConstInt(elem_llvm_type, max_index, false);
+ } else {
+ // 64-bit: mask to src size
+ index_mask = LLVMConstInt(elem_llvm_type, max_index, false);
+ }
+
+ for (i64 i = 0; i < count; i++) {
+ LLVMValueRef idx_i = LLVMConstInt(i32_type, cast(unsigned)i, false);
+ LLVMValueRef index_elem = LLVMBuildExtractElement(p->builder, indices, idx_i, "");
+
+ // Mask index to valid range
+ LLVMValueRef masked_index = LLVMBuildAnd(p->builder, index_elem, index_mask, "");
+
+ // Convert to i32 for extractelement
+ LLVMValueRef index_i32;
+ if (LLVMGetIntTypeWidth(LLVMTypeOf(masked_index)) < 32) {
+ index_i32 = LLVMBuildZExt(p->builder, masked_index, i32_type, "");
+ } else if (LLVMGetIntTypeWidth(LLVMTypeOf(masked_index)) > 32) {
+ index_i32 = LLVMBuildTrunc(p->builder, masked_index, i32_type, "");
+ } else {
+ index_i32 = masked_index;
+ }
+
+ values[i] = LLVMBuildExtractElement(p->builder, src, index_i32, "");
+ }
+
+ // Build result vector
+ res.value = LLVMGetUndef(LLVMTypeOf(src));
+ for (i64 i = 0; i < count; i++) {
+ LLVMValueRef idx_i = LLVMConstInt(i32_type, cast(unsigned)i, false);
+ res.value = LLVMBuildInsertElement(p->builder, res.value, values[i], idx_i, "");
+ }
+ return res;
+ }
+
case BuiltinProc_simd_ceil:
case BuiltinProc_simd_floor:
case BuiltinProc_simd_trunc:
@@ -1844,7 +2211,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
GB_ASSERT(e != nullptr);
if (e->parent_proc_decl != nullptr && e->parent_proc_decl->entity != nullptr) {
- procedure = e->parent_proc_decl->entity->token.string;
+ procedure = e->parent_proc_decl->entity.load()->token.string;
} else {
procedure = str_lit("");
}
@@ -1866,12 +2233,12 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
LLVMValueRef values[2] = {};
values[0] = lb_const_string(m, file_name).value;
- values[1] = lb_const_string(m, file->data).value;
+ values[1] = lb_const_value(m, t_u8_slice, exact_value_string(file->data)).value;
LLVMValueRef element = llvm_const_named_struct(m, t_load_directory_file, values, gb_count_of(values));
elements[i] = element;
}
- LLVMValueRef backing_array = llvm_const_array(lb_type(m, t_load_directory_file), elements, count);
+ LLVMValueRef backing_array = llvm_const_array(m, lb_type(m, t_load_directory_file), elements, count);
Type *array_type = alloc_type_array(t_load_directory_file, count);
lbAddr backing_array_addr = lb_add_global_generated_from_procedure(p, array_type, {backing_array, array_type});
@@ -1922,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
if (is_type_cstring(t)) {
return lb_cstring_len(p, v);
+ } else if (is_type_cstring16(t)) {
+ return lb_cstring16_len(p, v);
+ } else if (is_type_string16(t)) {
+ return lb_string_len(p, v);
} else if (is_type_string(t)) {
return lb_string_len(p, v);
} else if (is_type_array(t)) {
@@ -2150,6 +2521,68 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
return lb_emit_load(p, tuple);
}
+ case BuiltinProc_compress_values: {
+ isize value_count = 0;
+ for (Ast *arg : ce->args) {
+ Type *t = arg->tav.type;
+ if (is_type_tuple(t)) {
+ value_count += t->Tuple.variables.count;
+ } else {
+ value_count += 1;
+ }
+ }
+
+ if (value_count == 1) {
+ lbValue x = lb_build_expr(p, ce->args[0]);
+ x = lb_emit_conv(p, x, tv.type);
+ return x;
+ }
+
+ Type *dt = base_type(tv.type);
+ lbAddr addr = lb_add_local_generated(p, tv.type, true);
+ if (is_type_struct(dt) || is_type_tuple(dt)) {
+ i32 index = 0;
+ for (Ast *arg : ce->args) {
+ lbValue x = lb_build_expr(p, arg);
+ if (is_type_tuple(x.type)) {
+ for (isize i = 0; i < x.type->Tuple.variables.count; i++) {
+ lbValue y = lb_emit_tuple_ev(p, x, cast(i32)i);
+ lbValue ptr = lb_emit_struct_ep(p, addr.addr, index++);
+ y = lb_emit_conv(p, y, type_deref(ptr.type));
+ lb_emit_store(p, ptr, y);
+ }
+ } else {
+ lbValue ptr = lb_emit_struct_ep(p, addr.addr, index++);
+ x = lb_emit_conv(p, x, type_deref(ptr.type));
+ lb_emit_store(p, ptr, x);
+ }
+ }
+ GB_ASSERT(index == value_count);
+ } else if (is_type_array_like(dt)) {
+ i32 index = 0;
+ for (Ast *arg : ce->args) {
+ lbValue x = lb_build_expr(p, arg);
+ if (is_type_tuple(x.type)) {
+ for (isize i = 0; i < x.type->Tuple.variables.count; i++) {
+ lbValue y = lb_emit_tuple_ev(p, x, cast(i32)i);
+ lbValue ptr = lb_emit_array_epi(p, addr.addr, index++);
+ y = lb_emit_conv(p, y, type_deref(ptr.type));
+ lb_emit_store(p, ptr, y);
+ }
+ } else {
+ lbValue ptr = lb_emit_array_epi(p, addr.addr, index++);
+ x = lb_emit_conv(p, x, type_deref(ptr.type));
+ lb_emit_store(p, ptr, x);
+ }
+ }
+ GB_ASSERT(index == value_count);
+ } else {
+ GB_PANIC("TODO(bill): compress_values -> %s", type_to_string(tv.type));
+ }
+
+ return lb_addr_load(p, addr);
+ }
+
case BuiltinProc_min: {
Type *t = type_of_expr(expr);
if (ce->args.count == 2) {
@@ -2299,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
res = lb_emit_conv(p, res, tv.type);
} else if (t->Basic.kind == Basic_cstring) {
res = lb_emit_conv(p, x, tv.type);
+ } else if (t->Basic.kind == Basic_string16) {
+ res = lb_string_elem(p, x);
+ res = lb_emit_conv(p, res, tv.type);
+ } else if (t->Basic.kind == Basic_cstring16) {
+ res = lb_emit_conv(p, x, tv.type);
}
break;
case Type_Pointer:
@@ -2382,6 +2820,21 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
return res;
}
+ case BuiltinProc_read_cycle_counter_frequency:
+ {
+ lbValue res = {};
+ res.type = tv.type;
+
+ if (build_context.metrics.arch == TargetArch_arm64) {
+ LLVMTypeRef func_type = LLVMFunctionType(LLVMInt64TypeInContext(p->module->ctx), nullptr, 0, false);
+ bool has_side_effects = false;
+ LLVMValueRef the_asm = llvm_get_inline_asm(func_type, str_lit("mrs $0, cntfrq_el0"), str_lit("=r"), has_side_effects);
+ GB_ASSERT(the_asm != nullptr);
+ res.value = LLVMBuildCall2(p->builder, func_type, the_asm, nullptr, 0, "");
+ }
+
+ return res;
+ }
case BuiltinProc_count_trailing_zeros:
return lb_emit_count_trailing_zeros(p, lb_build_expr(p, ce->args[0]), tv.type);
@@ -2616,15 +3069,18 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
LLVMSetMetadata(instr, kind_id, LLVMMetadataAsValue(p->module->ctx, node));
}
break;
- case BuiltinProc_volatile_store: LLVMSetVolatile(instr, true); break;
- case BuiltinProc_atomic_store: LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); break;
+ case BuiltinProc_volatile_store:
+ LLVMSetVolatile(instr, true);
+ break;
+ case BuiltinProc_atomic_store:
+ LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent);
+ LLVMSetVolatile(instr, true);
+ break;
case BuiltinProc_atomic_store_explicit:
{
auto ordering = llvm_atomic_ordering_from_odin(ce->args[2]);
LLVMSetOrdering(instr, ordering);
- if (ordering == LLVMAtomicOrderingUnordered) {
- LLVMSetVolatile(instr, true);
- }
+ LLVMSetVolatile(instr, true);
}
break;
}
@@ -2650,15 +3106,18 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
break;
break;
- case BuiltinProc_volatile_load: LLVMSetVolatile(instr, true); break;
- case BuiltinProc_atomic_load: LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); break;
+ case BuiltinProc_volatile_load:
+ LLVMSetVolatile(instr, true);
+ break;
+ case BuiltinProc_atomic_load:
+ LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent);
+ LLVMSetVolatile(instr, true);
+ break;
case BuiltinProc_atomic_load_explicit:
{
auto ordering = llvm_atomic_ordering_from_odin(ce->args[1]);
LLVMSetOrdering(instr, ordering);
- if (ordering == LLVMAtomicOrderingUnordered) {
- LLVMSetVolatile(instr, true);
- }
+ LLVMSetVolatile(instr, true);
}
break;
}
@@ -2743,9 +3202,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
lbValue res = {};
res.value = LLVMBuildAtomicRMW(p->builder, op, dst.value, val.value, ordering, false);
res.type = tv.type;
- if (ordering == LLVMAtomicOrderingUnordered) {
- LLVMSetVolatile(res.value, true);
- }
+ LLVMSetVolatile(res.value, true);
return res;
}
@@ -2781,9 +3238,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
single_threaded
);
LLVMSetWeak(value, weak);
- if (success_ordering == LLVMAtomicOrderingUnordered || failure_ordering == LLVMAtomicOrderingUnordered) {
- LLVMSetVolatile(value, true);
- }
+ LLVMSetVolatile(value, true);
if (is_type_tuple(tv.type)) {
Type *fix_typed = alloc_type_tuple();
@@ -2846,16 +3301,22 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
GB_ASSERT(name != nullptr);
- LLVMTypeRef types[1] = {lb_type(p->module, platform_type)};
lbValue res = {};
+ res.type = platform_type;
- LLVMValueRef args[3] = {
+ if (id == BuiltinProc_fixed_point_div ||
+ id == BuiltinProc_fixed_point_div_sat) {
+ res.value = lb_integer_division_intrinsics(p, x.value, y.value, scale.value, platform_type, name);
+ } else {
+ LLVMTypeRef types[1] = {lb_type(p->module, platform_type)};
+
+ LLVMValueRef args[3] = {
x.value,
y.value,
scale.value };
- res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types));
- res.type = platform_type;
+ res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types));
+ }
return lb_emit_conv(p, res, tv.type);
}
@@ -2972,6 +3433,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
constraints = gb_string_appendc(constraints, "}");
}
+ constraints = gb_string_appendc(constraints, ",~{memory}");
+
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
}
break;
@@ -3034,6 +3497,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
constraints = gb_string_appendc(constraints, "}");
}
+ constraints = gb_string_appendc(constraints, ",~{memory}");
+
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
}
break;
@@ -3059,6 +3524,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
constraints = gb_string_appendc(constraints, "}");
}
+ constraints = gb_string_appendc(constraints, ",~{memory}");
+
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
} else {
char asm_string[] = "svc #0";
@@ -3078,6 +3545,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
constraints = gb_string_appendc(constraints, "}");
}
+ constraints = gb_string_appendc(constraints, ",~{memory}");
+
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
}
}
@@ -3104,6 +3573,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
constraints = gb_string_appendc(constraints, "}");
}
+ constraints = gb_string_appendc(constraints, ",~{memory}");
+
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
}
break;
@@ -3216,11 +3687,28 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
{
GB_ASSERT(arg_count <= 7);
- char asm_string[] = "svc #0; cset x8, cc";
- gbString constraints = gb_string_make(heap_allocator(), "={x0},={x8}");
- for (unsigned i = 0; i < arg_count; i++) {
- constraints = gb_string_appendc(constraints, ",{");
- static char const *regs[] = {
+ char const *asm_string;
+ char const **regs;
+ gbString constraints;
+
+ if (build_context.metrics.os == TargetOs_netbsd) {
+ asm_string = "svc #0; cset x17, cc";
+ constraints = gb_string_make(heap_allocator(), "={x0},={x17}");
+ static char const *_regs[] = {
+ "x17",
+ "x0",
+ "x1",
+ "x2",
+ "x3",
+ "x4",
+ "x5",
+ };
+ regs = _regs;
+ } else {
+ // FreeBSD (tested), OpenBSD (untested).
+ asm_string = "svc #0; cset x8, cc";
+ constraints = gb_string_make(heap_allocator(), "={x0},={x8}");
+ static char const *_regs[] = {
"x8",
"x0",
"x1",
@@ -3229,13 +3717,19 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
"x4",
"x5",
};
+ regs = _regs;
+
+ // FreeBSD clobbered x1 on a call to sysctl.
+ constraints = gb_string_appendc(constraints, ",~{x1}");
+ }
+
+ for (unsigned i = 0; i < arg_count; i++) {
+ constraints = gb_string_appendc(constraints, ",{");
constraints = gb_string_appendc(constraints, regs[i]);
constraints = gb_string_appendc(constraints, "}");
}
- // FreeBSD clobbered x1 on a call to sysctl.
- constraints = gb_string_appendc(constraints, ",~{x1},~{cc},~{memory}");
-
+ constraints = gb_string_appendc(constraints, ",~{cc},~{memory}");
inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints));
}
break;
@@ -3257,6 +3751,9 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
case BuiltinProc_objc_find_class: return lb_handle_objc_find_class(p, expr);
case BuiltinProc_objc_register_selector: return lb_handle_objc_register_selector(p, expr);
case BuiltinProc_objc_register_class: return lb_handle_objc_register_class(p, expr);
+ case BuiltinProc_objc_ivar_get: return lb_handle_objc_ivar_get(p, expr);
+ case BuiltinProc_objc_block: return lb_handle_objc_block(p, expr);
+ case BuiltinProc_objc_super: return lb_handle_objc_super(p, expr);
case BuiltinProc_constant_utf16_cstring:
@@ -3621,25 +4118,28 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
GB_ASSERT(ce->args.count == 1);
lbValue x = lb_build_expr(p, ce->args[0]);
lbValue y = lb_emit_conv(p, x, tv.type);
+ y.type = tv.type;
return y;
}
Ast *proc_expr = unparen_expr(ce->proc);
+ Entity *proc_entity = entity_of_node(proc_expr);
+
if (proc_mode == Addressing_Builtin) {
- Entity *e = entity_of_node(proc_expr);
BuiltinProcId id = BuiltinProc_Invalid;
- if (e != nullptr) {
- id = cast(BuiltinProcId)e->Builtin.id;
+ if (proc_entity != nullptr) {
+ id = cast(BuiltinProcId)proc_entity->Builtin.id;
} else {
id = BuiltinProc_DIRECTIVE;
}
return lb_build_builtin_proc(p, expr, tv, id);
}
+ bool is_objc_call = proc_entity && proc_entity->Procedure.is_objc_impl_or_import;
+
// NOTE(bill): Regular call
lbValue value = {};
- Entity *proc_entity = entity_of_node(proc_expr);
if (proc_entity != nullptr) {
if (proc_entity->flags & EntityFlag_Disabled) {
GB_ASSERT(tv.type == nullptr);
@@ -3673,11 +4173,13 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
}
}
- if (value.value == nullptr) {
+ if (is_objc_call) {
+ value.type = proc_tv.type;
+ } else if (value.value == nullptr) {
value = lb_build_expr(p, proc_expr);
}
- GB_ASSERT(value.value != nullptr);
+ GB_ASSERT(value.value != nullptr || is_objc_call);
Type *proc_type_ = base_type(value.type);
GB_ASSERT(proc_type_->kind == Type_Proc);
TypeProc *pt = &proc_type_->Proc;
@@ -3905,6 +4407,11 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
isize final_count = is_c_vararg ? args.count : pt->param_count;
auto call_args = array_slice(args, 0, final_count);
+
+ if (is_objc_call) {
+ return lb_handle_objc_auto_send(p, expr, slice(call_args, 0, call_args.count));
+ }
+
return lb_emit_call(p, value, call_args, ce->inlining);
}