diff options
Diffstat (limited to 'src/llvm_backend_proc.cpp')
| -rw-r--r-- | src/llvm_backend_proc.cpp | 645 |
1 files changed, 576 insertions, 69 deletions
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index a835ae2c8..08ef42f01 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -67,6 +67,14 @@ gb_internal void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValu gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body) { GB_ASSERT(entity != nullptr); GB_ASSERT(entity->kind == Entity_Procedure); + // Skip codegen for unspecialized polymorphic procedures + if (is_type_polymorphic(entity->type) && !entity->Procedure.is_foreign) { + Type *bt = base_type(entity->type); + if (bt->kind == Type_Proc && bt->Proc.is_polymorphic && !bt->Proc.is_poly_specialized) { + // Do not generate code for unspecialized polymorphic procedures + return nullptr; + } + } if (!entity->Procedure.is_foreign) { if ((entity->flags & EntityFlag_ProcBodyChecked) == 0) { GB_PANIC("%.*s :: %s (was parapoly: %d %d)", LIT(entity->token.string), type_to_string(entity->type), is_type_polymorphic(entity->type, true), is_type_polymorphic(entity->type, false)); @@ -76,7 +84,7 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i String link_name = {}; if (ignore_body) { - lbModule *other_module = lb_module_of_entity(m->gen, entity); + lbModule *other_module = lb_module_of_entity(m->gen, entity, m); link_name = lb_get_entity_name(other_module, entity); } else { link_name = lb_get_entity_name(m, entity); @@ -91,7 +99,6 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i } } - lbProcedure *p = gb_alloc_item(permanent_allocator(), lbProcedure); p->module = m; @@ -115,12 +122,13 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i p->is_entry_point = false; gbAllocator a = heap_allocator(); - p->children.allocator = a; - p->defer_stmts.allocator = a; - p->blocks.allocator = a; - p->branch_blocks.allocator = a; - p->context_stack.allocator = a; - p->scope_stack.allocator = a; + p->children.allocator = a; + p->defer_stmts.allocator = a; + p->blocks.allocator = a; + p->branch_blocks.allocator = a; + p->context_stack.allocator = a; + p->scope_stack.allocator = a; + p->asan_stack_locals.allocator = a; // map_init(&p->selector_values, 0); // map_init(&p->selector_addr, 0); // map_init(&p->tuple_fix_map, 0); @@ -333,10 +341,10 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i } if (p->body && entity->pkg && ((entity->pkg->kind == Package_Normal) || (entity->pkg->kind == Package_Init))) { - if (build_context.sanitizer_flags & SanitizerFlag_Address) { + if (build_context.sanitizer_flags & SanitizerFlag_Address && !entity->Procedure.no_sanitize_address) { lb_add_attribute_to_proc(m, p->value, "sanitize_address"); } - if (build_context.sanitizer_flags & SanitizerFlag_Memory) { + if (build_context.sanitizer_flags & SanitizerFlag_Memory && !entity->Procedure.no_sanitize_memory) { lb_add_attribute_to_proc(m, p->value, "sanitize_memory"); } if (build_context.sanitizer_flags & SanitizerFlag_Thread) { @@ -385,11 +393,12 @@ gb_internal lbProcedure *lb_create_dummy_procedure(lbModule *m, String link_name p->is_entry_point = false; gbAllocator a = permanent_allocator(); - p->children.allocator = a; - p->defer_stmts.allocator = a; - p->blocks.allocator = a; - p->branch_blocks.allocator = a; - p->context_stack.allocator = a; + p->children.allocator = a; + p->defer_stmts.allocator = a; + p->blocks.allocator = a; + p->branch_blocks.allocator = a; + p->context_stack.allocator = a; + p->asan_stack_locals.allocator = a; map_init(&p->tuple_fix_map, 0); @@ -536,6 +545,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { GB_ASSERT(p->type != nullptr); lb_ensure_abi_function_type(p->module, p); + if (p->type->Proc.calling_convention == ProcCC_Odin) { + lb_push_context_onto_stack_from_implicit_parameter(p); + } { lbFunctionType *ft = p->abi_function_type; @@ -733,9 +745,6 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { } } - if (p->type->Proc.calling_convention == ProcCC_Odin) { - lb_push_context_onto_stack_from_implicit_parameter(p); - } lb_set_debug_position_to_procedure_begin(p); if (p->debug_info != nullptr) { @@ -781,17 +790,15 @@ gb_internal void lb_end_procedure_body(lbProcedure *p) { p->curr_block = nullptr; p->state_flags = 0; -} -gb_internal void lb_end_procedure(lbProcedure *p) { + LLVMDisposeBuilder(p->builder); } gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e) { GB_ASSERT(pd->body != nullptr); lbModule *m = p->module; - auto *min_dep_set = &m->info->minimum_dependency_set; - if (ptr_set_exists(min_dep_set, e) == false) { + if (e->min_dep_count.load(std::memory_order_relaxed) == 0) { // NOTE(bill): Nothing depends upon it so doesn't need to be built return; } @@ -815,6 +822,10 @@ gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e) e->Procedure.link_name = name; lbProcedure *nested_proc = lb_create_procedure(p->module, e); + if (nested_proc == nullptr) { + // This is an unspecialized polymorphic procedure, skip codegen + return; + } e->code_gen_procedure = nested_proc; lbValue value = {}; @@ -823,7 +834,7 @@ gb_internal void lb_build_nested_proc(lbProcedure *p, AstProcLit *pd, Entity *e) lb_add_entity(m, e, value); array_add(&p->children, nested_proc); - array_add(&m->procedures_to_generate, nested_proc); + mpsc_enqueue(&m->procedures_to_generate, nested_proc); } @@ -910,13 +921,14 @@ gb_internal lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue arg_type != param_type) { LLVMTypeKind arg_kind = LLVMGetTypeKind(arg_type); LLVMTypeKind param_kind = LLVMGetTypeKind(param_type); - if (arg_kind == param_kind && - arg_kind == LLVMPointerTypeKind) { - // NOTE(bill): LLVM's newer `ptr` only type system seems to fail at times - // I don't know why... - args[i] = LLVMBuildPointerCast(p->builder, args[i], param_type, ""); - arg_type = param_type; - continue; + if (arg_kind == param_kind) { + if (arg_kind == LLVMPointerTypeKind) { + // NOTE(bill): LLVM's newer `ptr` only type system seems to fail at times + // I don't know why... + args[i] = LLVMBuildPointerCast(p->builder, args[i], param_type, ""); + arg_type = param_type; + continue; + } } } @@ -972,6 +984,7 @@ gb_internal lbValue lb_emit_call_internal(lbProcedure *p, lbValue value, lbValue gb_internal lbValue lb_lookup_runtime_procedure(lbModule *m, String const &name) { AstPackage *pkg = m->info->runtime_package; Entity *e = scope_lookup_current(pkg->scope, name); + GB_ASSERT_MSG(e != nullptr, "Runtime procedure not found: %s", name); return lb_find_procedure_value_from_entity(m, e); } @@ -1060,6 +1073,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c lbValue result = {}; + isize ignored_args = 0; auto processed_args = array_make<lbValue>(permanent_allocator(), 0, args.count); { @@ -1082,6 +1096,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c lbArgType *arg = &ft->args[param_index]; if (arg->kind == lbArg_Ignore) { param_index += 1; + ignored_args += 1; continue; } @@ -1190,7 +1205,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c auto tuple_fix_values = slice_make<lbValue>(permanent_allocator(), ret_count); auto tuple_geps = slice_make<lbValue>(permanent_allocator(), ret_count); - isize offset = ft->original_arg_count; + isize offset = ft->original_arg_count - ignored_args; for (isize j = 0; j < ret_count-1; j++) { lbValue ret_arg_ptr = processed_args[offset + j]; lbValue ret_arg = lb_emit_load(p, ret_arg_ptr); @@ -1293,6 +1308,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn lbValue res = {}; res.type = tv.type; + switch (builtin_id) { + case BuiltinProc_simd_indices: { + Type *type = base_type(res.type); + GB_ASSERT(type->kind == Type_SimdVector); + Type *elem = type->SimdVector.elem; + + i64 count = type->SimdVector.count; + LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count); + for (i64 i = 0; i < count; i++) { + scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value; + } + + res.value = LLVMConstVector(scalars, cast(unsigned)count); + return res; + } + } + lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]); lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]); lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]); @@ -1442,7 +1474,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn LLVMRealPredicate pred = cast(LLVMRealPredicate)0; switch (builtin_id) { case BuiltinProc_simd_lanes_eq: pred = LLVMRealOEQ; break; - case BuiltinProc_simd_lanes_ne: pred = LLVMRealONE; break; + case BuiltinProc_simd_lanes_ne: pred = LLVMRealUNE; break; case BuiltinProc_simd_lanes_lt: pred = LLVMRealOLT; break; case BuiltinProc_simd_lanes_le: pred = LLVMRealOLE; break; case BuiltinProc_simd_lanes_gt: pred = LLVMRealOGT; break; @@ -1478,6 +1510,38 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn res.value = LLVMBuildInsertElement(p->builder, arg0.value, arg2.value, arg1.value, ""); return res; + case BuiltinProc_simd_reduce_add_bisect: + case BuiltinProc_simd_reduce_mul_bisect: + { + GB_ASSERT(arg0.type->kind == Type_SimdVector); + i64 num_elems = arg0.type->SimdVector.count; + + LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems); + for (i64 i = 0; i < num_elems; i++) { + indices[i] = lb_const_int(m, t_uint, cast(u64)i).value; + } + + switch (builtin_id) { + case BuiltinProc_simd_reduce_add_bisect: op_code = is_float ? LLVMFAdd : LLVMAdd; break; + case BuiltinProc_simd_reduce_mul_bisect: op_code = is_float ? LLVMFMul : LLVMMul; break; + } + + LLVMValueRef remaining = arg0.value; + i64 num_remaining = num_elems; + + while (num_remaining > 1) { + num_remaining /= 2; + LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining); + LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, ""); + LLVMValueRef right_indices = LLVMConstVector(&indices[num_remaining], cast(unsigned)num_remaining); + LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, ""); + remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, ""); + } + + res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], ""); + return res; + } + case BuiltinProc_simd_reduce_add_ordered: case BuiltinProc_simd_reduce_mul_ordered: { @@ -1510,6 +1574,40 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn res.value = lb_call_intrinsic(p, name, args, cast(unsigned)args_count, types, gb_count_of(types)); return res; } + + case BuiltinProc_simd_reduce_add_pairs: + case BuiltinProc_simd_reduce_mul_pairs: + { + GB_ASSERT(arg0.type->kind == Type_SimdVector); + i64 num_elems = arg0.type->SimdVector.count; + + LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems); + for (i64 i = 0; i < num_elems/2; i++) { + indices[i] = lb_const_int(m, t_uint, cast(u64)(2*i)).value; + indices[i+num_elems/2] = lb_const_int(m, t_uint, cast(u64)(2*i+1)).value; + } + + switch (builtin_id) { + case BuiltinProc_simd_reduce_add_pairs: op_code = is_float ? LLVMFAdd : LLVMAdd; break; + case BuiltinProc_simd_reduce_mul_pairs: op_code = is_float ? LLVMFMul : LLVMMul; break; + } + + LLVMValueRef remaining = arg0.value; + i64 num_remaining = num_elems; + + while (num_remaining > 1) { + num_remaining /= 2; + LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining); + LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, ""); + LLVMValueRef right_indices = LLVMConstVector(&indices[num_elems/2], cast(unsigned)num_remaining); + LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, ""); + remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, ""); + } + + res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], ""); + return res; + } + case BuiltinProc_simd_reduce_min: case BuiltinProc_simd_reduce_max: case BuiltinProc_simd_reduce_and: @@ -1625,6 +1723,275 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; } + case BuiltinProc_simd_runtime_swizzle: + { + LLVMValueRef src = arg0.value; + LLVMValueRef indices = lb_build_expr(p, ce->args[1]).value; + + Type *vt = arg0.type; + GB_ASSERT(vt->kind == Type_SimdVector); + i64 count = vt->SimdVector.count; + Type *elem_type = vt->SimdVector.elem; + i64 elem_size = type_size_of(elem_type); + + // Determine strategy based on element size and target architecture + char const *intrinsic_name = nullptr; + bool use_hardware_runtime_swizzle = false; + + // 8-bit elements: Use dedicated table lookup instructions + if (elem_size == 1) { + use_hardware_runtime_swizzle = true; + + if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) { + // x86/x86-64: Use pshufb intrinsics + switch (count) { + case 16: + intrinsic_name = "llvm.x86.ssse3.pshuf.b.128"; + break; + case 32: + intrinsic_name = "llvm.x86.avx2.pshuf.b"; + break; + case 64: + intrinsic_name = "llvm.x86.avx512.pshuf.b.512"; + break; + default: + use_hardware_runtime_swizzle = false; + break; + } + } else if (build_context.metrics.arch == TargetArch_arm64) { + // ARM64: Use NEON tbl intrinsics with automatic table splitting + switch (count) { + case 16: + intrinsic_name = "llvm.aarch64.neon.tbl1"; + break; + case 32: + intrinsic_name = "llvm.aarch64.neon.tbl2"; + break; + case 48: + intrinsic_name = "llvm.aarch64.neon.tbl3"; + break; + case 64: + intrinsic_name = "llvm.aarch64.neon.tbl4"; + break; + default: + use_hardware_runtime_swizzle = false; + break; + } + } else if (build_context.metrics.arch == TargetArch_arm32) { + // ARM32: Use NEON vtbl intrinsics with automatic table splitting + switch (count) { + case 8: + intrinsic_name = "llvm.arm.neon.vtbl1"; + break; + case 16: + intrinsic_name = "llvm.arm.neon.vtbl2"; + break; + case 24: + intrinsic_name = "llvm.arm.neon.vtbl3"; + break; + case 32: + intrinsic_name = "llvm.arm.neon.vtbl4"; + break; + default: + use_hardware_runtime_swizzle = false; + break; + } + } else if (build_context.metrics.arch == TargetArch_wasm32 || build_context.metrics.arch == TargetArch_wasm64p32) { + // WebAssembly: Use swizzle (only supports 16-byte vectors) + if (count == 16) { + intrinsic_name = "llvm.wasm.swizzle"; + } else { + use_hardware_runtime_swizzle = false; + } + } else { + use_hardware_runtime_swizzle = false; + } + } + + if (use_hardware_runtime_swizzle && intrinsic_name != nullptr) { + // Use dedicated hardware swizzle instruction + + // Check if required target features are enabled + bool features_enabled = true; + if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) { + // x86/x86-64 feature checking + if (count == 16) { + // SSE/SSSE3 for 128-bit vectors + if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr)) { + features_enabled = false; + } + } else if (count == 32) { + // AVX2 requires ssse3 + avx2 features + if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr) || + !check_target_feature_is_enabled(str_lit("avx2"), nullptr)) { + features_enabled = false; + } + } else if (count == 64) { + // AVX512 requires ssse3 + avx2 + avx512f + avx512bw features + if (!check_target_feature_is_enabled(str_lit("ssse3"), nullptr) || + !check_target_feature_is_enabled(str_lit("avx2"), nullptr) || + !check_target_feature_is_enabled(str_lit("avx512f"), nullptr) || + !check_target_feature_is_enabled(str_lit("avx512bw"), nullptr)) { + features_enabled = false; + } + } + } else if (build_context.metrics.arch == TargetArch_arm64 || build_context.metrics.arch == TargetArch_arm32) { + // ARM/ARM64 feature checking - NEON is required for all table/swizzle ops + if (!check_target_feature_is_enabled(str_lit("neon"), nullptr)) { + features_enabled = false; + } + } + + if (features_enabled) { + // Add target features to function attributes for LLVM instruction selection + if (build_context.metrics.arch == TargetArch_amd64 || build_context.metrics.arch == TargetArch_i386) { + // x86/x86-64 function attributes + if (count == 16) { + // SSE/SSSE3 for 128-bit vectors + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+ssse3")); + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("128")); + } else if (count == 32) { + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+avx,+avx2,+ssse3")); + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("256")); + } else if (count == 64) { + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+avx,+avx2,+avx512f,+avx512bw,+ssse3")); + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("512")); + } + } else if (build_context.metrics.arch == TargetArch_arm64) { + // ARM64 function attributes - enable NEON for swizzle instructions + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+neon")); + // Set appropriate vector width for multi-swizzle operations + if (count >= 32) { + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("min-legal-vector-width"), str_lit("256")); + } + } else if (build_context.metrics.arch == TargetArch_arm32) { + // ARM32 function attributes - enable NEON for swizzle instructions + lb_add_attribute_to_proc_with_string(p->module, p->value, str_lit("target-features"), str_lit("+neon")); + } + + // Handle ARM's multi-swizzle intrinsics by splitting the src vector + if (build_context.metrics.arch == TargetArch_arm64 && count > 16) { + // ARM64 TBL2/TBL3/TBL4: Split src into multiple 16-byte vectors + int num_tables = cast(int)(count / 16); + GB_ASSERT_MSG(count % 16 == 0, "ARM64 src size must be multiple of 16 bytes, got %lld bytes", count); + GB_ASSERT_MSG(num_tables <= 4, "ARM64 NEON supports maximum 4 tables (tbl4), got %d tables for %lld-byte vector", num_tables, count); + + LLVMValueRef src_parts[4]; // Max 4 tables for tbl4 + for (int i = 0; i < num_tables; i++) { + // Extract 16-byte slice from the larger src + LLVMValueRef indices_for_extract[16]; + for (int j = 0; j < 16; j++) { + indices_for_extract[j] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), i * 16 + j, false); + } + LLVMValueRef extract_mask = LLVMConstVector(indices_for_extract, 16); + src_parts[i] = LLVMBuildShuffleVector(p->builder, src, LLVMGetUndef(LLVMTypeOf(src)), extract_mask, ""); + } + + // Call appropriate ARM64 tbl intrinsic + if (count == 32) { + LLVMValueRef args[3] = { src_parts[0], src_parts[1], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 3, nullptr, 0); + } else if (count == 48) { + LLVMValueRef args[4] = { src_parts[0], src_parts[1], src_parts[2], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 4, nullptr, 0); + } else if (count == 64) { + LLVMValueRef args[5] = { src_parts[0], src_parts[1], src_parts[2], src_parts[3], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 5, nullptr, 0); + } + } else if (build_context.metrics.arch == TargetArch_arm32 && count > 8) { + // ARM32 VTBL2/VTBL3/VTBL4: Split src into multiple 8-byte vectors + int num_tables = cast(int)count / 8; + GB_ASSERT_MSG(count % 8 == 0, "ARM32 src size must be multiple of 8 bytes, got %lld bytes", count); + GB_ASSERT_MSG(num_tables <= 4, "ARM32 NEON supports maximum 4 tables (vtbl4), got %d tables for %lld-byte vector", num_tables, count); + + LLVMValueRef src_parts[4]; // Max 4 tables for vtbl4 + for (int i = 0; i < num_tables; i++) { + // Extract 8-byte slice from the larger src + LLVMValueRef indices_for_extract[8]; + for (int j = 0; j < 8; j++) { + indices_for_extract[j] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), i * 8 + j, false); + } + LLVMValueRef extract_mask = LLVMConstVector(indices_for_extract, 8); + src_parts[i] = LLVMBuildShuffleVector(p->builder, src, LLVMGetUndef(LLVMTypeOf(src)), extract_mask, ""); + } + + // Call appropriate ARM32 vtbl intrinsic + if (count == 16) { + LLVMValueRef args[3] = { src_parts[0], src_parts[1], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 3, nullptr, 0); + } else if (count == 24) { + LLVMValueRef args[4] = { src_parts[0], src_parts[1], src_parts[2], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 4, nullptr, 0); + } else if (count == 32) { + LLVMValueRef args[5] = { src_parts[0], src_parts[1], src_parts[2], src_parts[3], indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, 5, nullptr, 0); + } + } else { + // Single runtime swizzle case (x86, WebAssembly, ARM single-table) + LLVMValueRef args[2] = { src, indices }; + res.value = lb_call_intrinsic(p, intrinsic_name, args, gb_count_of(args), nullptr, 0); + } + return res; + } else { + // Features not enabled, fall back to emulation + use_hardware_runtime_swizzle = false; + } + } + + // Fallback: Emulate with extracts and inserts for all element sizes + GB_ASSERT(count > 0 && count <= 64); // Sanity check + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, count); + LLVMTypeRef i32_type = LLVMInt32TypeInContext(p->module->ctx); + LLVMTypeRef elem_llvm_type = lb_type(p->module, elem_type); + + // Calculate mask based on element size and vector count + i64 max_index = count - 1; + LLVMValueRef index_mask; + + if (elem_size == 1) { + // 8-bit: mask to src size (like pshufb behavior) + index_mask = LLVMConstInt(elem_llvm_type, max_index, false); + } else if (elem_size == 2) { + // 16-bit: mask to src size + index_mask = LLVMConstInt(elem_llvm_type, max_index, false); + } else if (elem_size == 4) { + // 32-bit: mask to src size + index_mask = LLVMConstInt(elem_llvm_type, max_index, false); + } else { + // 64-bit: mask to src size + index_mask = LLVMConstInt(elem_llvm_type, max_index, false); + } + + for (i64 i = 0; i < count; i++) { + LLVMValueRef idx_i = LLVMConstInt(i32_type, cast(unsigned)i, false); + LLVMValueRef index_elem = LLVMBuildExtractElement(p->builder, indices, idx_i, ""); + + // Mask index to valid range + LLVMValueRef masked_index = LLVMBuildAnd(p->builder, index_elem, index_mask, ""); + + // Convert to i32 for extractelement + LLVMValueRef index_i32; + if (LLVMGetIntTypeWidth(LLVMTypeOf(masked_index)) < 32) { + index_i32 = LLVMBuildZExt(p->builder, masked_index, i32_type, ""); + } else if (LLVMGetIntTypeWidth(LLVMTypeOf(masked_index)) > 32) { + index_i32 = LLVMBuildTrunc(p->builder, masked_index, i32_type, ""); + } else { + index_i32 = masked_index; + } + + values[i] = LLVMBuildExtractElement(p->builder, src, index_i32, ""); + } + + // Build result vector + res.value = LLVMGetUndef(LLVMTypeOf(src)); + for (i64 i = 0; i < count; i++) { + LLVMValueRef idx_i = LLVMConstInt(i32_type, cast(unsigned)i, false); + res.value = LLVMBuildInsertElement(p->builder, res.value, values[i], idx_i, ""); + } + return res; + } + case BuiltinProc_simd_ceil: case BuiltinProc_simd_floor: case BuiltinProc_simd_trunc: @@ -1844,7 +2211,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu GB_ASSERT(e != nullptr); if (e->parent_proc_decl != nullptr && e->parent_proc_decl->entity != nullptr) { - procedure = e->parent_proc_decl->entity->token.string; + procedure = e->parent_proc_decl->entity.load()->token.string; } else { procedure = str_lit(""); } @@ -1866,12 +2233,12 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu LLVMValueRef values[2] = {}; values[0] = lb_const_string(m, file_name).value; - values[1] = lb_const_string(m, file->data).value; + values[1] = lb_const_value(m, t_u8_slice, exact_value_string(file->data)).value; LLVMValueRef element = llvm_const_named_struct(m, t_load_directory_file, values, gb_count_of(values)); elements[i] = element; } - LLVMValueRef backing_array = llvm_const_array(lb_type(m, t_load_directory_file), elements, count); + LLVMValueRef backing_array = llvm_const_array(m, lb_type(m, t_load_directory_file), elements, count); Type *array_type = alloc_type_array(t_load_directory_file, count); lbAddr backing_array_addr = lb_add_global_generated_from_procedure(p, array_type, {backing_array, array_type}); @@ -1922,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } if (is_type_cstring(t)) { return lb_cstring_len(p, v); + } else if (is_type_cstring16(t)) { + return lb_cstring16_len(p, v); + } else if (is_type_string16(t)) { + return lb_string_len(p, v); } else if (is_type_string(t)) { return lb_string_len(p, v); } else if (is_type_array(t)) { @@ -2150,6 +2521,68 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu return lb_emit_load(p, tuple); } + case BuiltinProc_compress_values: { + isize value_count = 0; + for (Ast *arg : ce->args) { + Type *t = arg->tav.type; + if (is_type_tuple(t)) { + value_count += t->Tuple.variables.count; + } else { + value_count += 1; + } + } + + if (value_count == 1) { + lbValue x = lb_build_expr(p, ce->args[0]); + x = lb_emit_conv(p, x, tv.type); + return x; + } + + Type *dt = base_type(tv.type); + lbAddr addr = lb_add_local_generated(p, tv.type, true); + if (is_type_struct(dt) || is_type_tuple(dt)) { + i32 index = 0; + for (Ast *arg : ce->args) { + lbValue x = lb_build_expr(p, arg); + if (is_type_tuple(x.type)) { + for (isize i = 0; i < x.type->Tuple.variables.count; i++) { + lbValue y = lb_emit_tuple_ev(p, x, cast(i32)i); + lbValue ptr = lb_emit_struct_ep(p, addr.addr, index++); + y = lb_emit_conv(p, y, type_deref(ptr.type)); + lb_emit_store(p, ptr, y); + } + } else { + lbValue ptr = lb_emit_struct_ep(p, addr.addr, index++); + x = lb_emit_conv(p, x, type_deref(ptr.type)); + lb_emit_store(p, ptr, x); + } + } + GB_ASSERT(index == value_count); + } else if (is_type_array_like(dt)) { + i32 index = 0; + for (Ast *arg : ce->args) { + lbValue x = lb_build_expr(p, arg); + if (is_type_tuple(x.type)) { + for (isize i = 0; i < x.type->Tuple.variables.count; i++) { + lbValue y = lb_emit_tuple_ev(p, x, cast(i32)i); + lbValue ptr = lb_emit_array_epi(p, addr.addr, index++); + y = lb_emit_conv(p, y, type_deref(ptr.type)); + lb_emit_store(p, ptr, y); + } + } else { + lbValue ptr = lb_emit_array_epi(p, addr.addr, index++); + x = lb_emit_conv(p, x, type_deref(ptr.type)); + lb_emit_store(p, ptr, x); + } + } + GB_ASSERT(index == value_count); + } else { + GB_PANIC("TODO(bill): compress_values -> %s", type_to_string(tv.type)); + } + + return lb_addr_load(p, addr); + } + case BuiltinProc_min: { Type *t = type_of_expr(expr); if (ce->args.count == 2) { @@ -2299,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu res = lb_emit_conv(p, res, tv.type); } else if (t->Basic.kind == Basic_cstring) { res = lb_emit_conv(p, x, tv.type); + } else if (t->Basic.kind == Basic_string16) { + res = lb_string_elem(p, x); + res = lb_emit_conv(p, res, tv.type); + } else if (t->Basic.kind == Basic_cstring16) { + res = lb_emit_conv(p, x, tv.type); } break; case Type_Pointer: @@ -2382,6 +2820,21 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } return res; } + case BuiltinProc_read_cycle_counter_frequency: + { + lbValue res = {}; + res.type = tv.type; + + if (build_context.metrics.arch == TargetArch_arm64) { + LLVMTypeRef func_type = LLVMFunctionType(LLVMInt64TypeInContext(p->module->ctx), nullptr, 0, false); + bool has_side_effects = false; + LLVMValueRef the_asm = llvm_get_inline_asm(func_type, str_lit("mrs $0, cntfrq_el0"), str_lit("=r"), has_side_effects); + GB_ASSERT(the_asm != nullptr); + res.value = LLVMBuildCall2(p->builder, func_type, the_asm, nullptr, 0, ""); + } + + return res; + } case BuiltinProc_count_trailing_zeros: return lb_emit_count_trailing_zeros(p, lb_build_expr(p, ce->args[0]), tv.type); @@ -2616,15 +3069,18 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu LLVMSetMetadata(instr, kind_id, LLVMMetadataAsValue(p->module->ctx, node)); } break; - case BuiltinProc_volatile_store: LLVMSetVolatile(instr, true); break; - case BuiltinProc_atomic_store: LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); break; + case BuiltinProc_volatile_store: + LLVMSetVolatile(instr, true); + break; + case BuiltinProc_atomic_store: + LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); + LLVMSetVolatile(instr, true); + break; case BuiltinProc_atomic_store_explicit: { auto ordering = llvm_atomic_ordering_from_odin(ce->args[2]); LLVMSetOrdering(instr, ordering); - if (ordering == LLVMAtomicOrderingUnordered) { - LLVMSetVolatile(instr, true); - } + LLVMSetVolatile(instr, true); } break; } @@ -2650,15 +3106,18 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } break; break; - case BuiltinProc_volatile_load: LLVMSetVolatile(instr, true); break; - case BuiltinProc_atomic_load: LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); break; + case BuiltinProc_volatile_load: + LLVMSetVolatile(instr, true); + break; + case BuiltinProc_atomic_load: + LLVMSetOrdering(instr, LLVMAtomicOrderingSequentiallyConsistent); + LLVMSetVolatile(instr, true); + break; case BuiltinProc_atomic_load_explicit: { auto ordering = llvm_atomic_ordering_from_odin(ce->args[1]); LLVMSetOrdering(instr, ordering); - if (ordering == LLVMAtomicOrderingUnordered) { - LLVMSetVolatile(instr, true); - } + LLVMSetVolatile(instr, true); } break; } @@ -2743,9 +3202,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu lbValue res = {}; res.value = LLVMBuildAtomicRMW(p->builder, op, dst.value, val.value, ordering, false); res.type = tv.type; - if (ordering == LLVMAtomicOrderingUnordered) { - LLVMSetVolatile(res.value, true); - } + LLVMSetVolatile(res.value, true); return res; } @@ -2781,9 +3238,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu single_threaded ); LLVMSetWeak(value, weak); - if (success_ordering == LLVMAtomicOrderingUnordered || failure_ordering == LLVMAtomicOrderingUnordered) { - LLVMSetVolatile(value, true); - } + LLVMSetVolatile(value, true); if (is_type_tuple(tv.type)) { Type *fix_typed = alloc_type_tuple(); @@ -2846,16 +3301,22 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } GB_ASSERT(name != nullptr); - LLVMTypeRef types[1] = {lb_type(p->module, platform_type)}; lbValue res = {}; + res.type = platform_type; - LLVMValueRef args[3] = { + if (id == BuiltinProc_fixed_point_div || + id == BuiltinProc_fixed_point_div_sat) { + res.value = lb_integer_division_intrinsics(p, x.value, y.value, scale.value, platform_type, name); + } else { + LLVMTypeRef types[1] = {lb_type(p->module, platform_type)}; + + LLVMValueRef args[3] = { x.value, y.value, scale.value }; - res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); - res.type = platform_type; + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + } return lb_emit_conv(p, res, tv.type); } @@ -2972,6 +3433,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3034,6 +3497,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3059,6 +3524,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } else { char asm_string[] = "svc #0"; @@ -3078,6 +3545,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } } @@ -3104,6 +3573,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu constraints = gb_string_appendc(constraints, "}"); } + constraints = gb_string_appendc(constraints, ",~{memory}"); + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3216,11 +3687,28 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu { GB_ASSERT(arg_count <= 7); - char asm_string[] = "svc #0; cset x8, cc"; - gbString constraints = gb_string_make(heap_allocator(), "={x0},={x8}"); - for (unsigned i = 0; i < arg_count; i++) { - constraints = gb_string_appendc(constraints, ",{"); - static char const *regs[] = { + char const *asm_string; + char const **regs; + gbString constraints; + + if (build_context.metrics.os == TargetOs_netbsd) { + asm_string = "svc #0; cset x17, cc"; + constraints = gb_string_make(heap_allocator(), "={x0},={x17}"); + static char const *_regs[] = { + "x17", + "x0", + "x1", + "x2", + "x3", + "x4", + "x5", + }; + regs = _regs; + } else { + // FreeBSD (tested), OpenBSD (untested). + asm_string = "svc #0; cset x8, cc"; + constraints = gb_string_make(heap_allocator(), "={x0},={x8}"); + static char const *_regs[] = { "x8", "x0", "x1", @@ -3229,13 +3717,19 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu "x4", "x5", }; + regs = _regs; + + // FreeBSD clobbered x1 on a call to sysctl. + constraints = gb_string_appendc(constraints, ",~{x1}"); + } + + for (unsigned i = 0; i < arg_count; i++) { + constraints = gb_string_appendc(constraints, ",{"); constraints = gb_string_appendc(constraints, regs[i]); constraints = gb_string_appendc(constraints, "}"); } - // FreeBSD clobbered x1 on a call to sysctl. - constraints = gb_string_appendc(constraints, ",~{x1},~{cc},~{memory}"); - + constraints = gb_string_appendc(constraints, ",~{cc},~{memory}"); inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -3257,6 +3751,9 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_objc_find_class: return lb_handle_objc_find_class(p, expr); case BuiltinProc_objc_register_selector: return lb_handle_objc_register_selector(p, expr); case BuiltinProc_objc_register_class: return lb_handle_objc_register_class(p, expr); + case BuiltinProc_objc_ivar_get: return lb_handle_objc_ivar_get(p, expr); + case BuiltinProc_objc_block: return lb_handle_objc_block(p, expr); + case BuiltinProc_objc_super: return lb_handle_objc_super(p, expr); case BuiltinProc_constant_utf16_cstring: @@ -3621,25 +4118,28 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { GB_ASSERT(ce->args.count == 1); lbValue x = lb_build_expr(p, ce->args[0]); lbValue y = lb_emit_conv(p, x, tv.type); + y.type = tv.type; return y; } Ast *proc_expr = unparen_expr(ce->proc); + Entity *proc_entity = entity_of_node(proc_expr); + if (proc_mode == Addressing_Builtin) { - Entity *e = entity_of_node(proc_expr); BuiltinProcId id = BuiltinProc_Invalid; - if (e != nullptr) { - id = cast(BuiltinProcId)e->Builtin.id; + if (proc_entity != nullptr) { + id = cast(BuiltinProcId)proc_entity->Builtin.id; } else { id = BuiltinProc_DIRECTIVE; } return lb_build_builtin_proc(p, expr, tv, id); } + bool is_objc_call = proc_entity && proc_entity->Procedure.is_objc_impl_or_import; + // NOTE(bill): Regular call lbValue value = {}; - Entity *proc_entity = entity_of_node(proc_expr); if (proc_entity != nullptr) { if (proc_entity->flags & EntityFlag_Disabled) { GB_ASSERT(tv.type == nullptr); @@ -3673,11 +4173,13 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } } - if (value.value == nullptr) { + if (is_objc_call) { + value.type = proc_tv.type; + } else if (value.value == nullptr) { value = lb_build_expr(p, proc_expr); } - GB_ASSERT(value.value != nullptr); + GB_ASSERT(value.value != nullptr || is_objc_call); Type *proc_type_ = base_type(value.type); GB_ASSERT(proc_type_->kind == Type_Proc); TypeProc *pt = &proc_type_->Proc; @@ -3905,6 +4407,11 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { isize final_count = is_c_vararg ? args.count : pt->param_count; auto call_args = array_slice(args, 0, final_count); + + if (is_objc_call) { + return lb_handle_objc_auto_send(p, expr, slice(call_args, 0, call_args.count)); + } + return lb_emit_call(p, value, call_args, ce->inlining); } |