aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend_proc.cpp
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2025-05-06 15:46:49 +0100
committerGitHub <noreply@github.com>2025-05-06 15:46:49 +0100
commit0cf5b5984de51c18382b07f20b997f89442fab36 (patch)
treebf1dd12e79c8ff88ba3495e4a24708eea372e398 /src/llvm_backend_proc.cpp
parente07451898396c84b6a80e63c3e283939d699784a (diff)
parentdd5b7852ce569027e87d77f46601210aa4180947 (diff)
Merge pull request #5108 from Barinzaya/core-simd-indices-redadd-redmul
Alternate `reduce_add`/`reduce_mul` intrinsics
Diffstat (limited to 'src/llvm_backend_proc.cpp')
-rw-r--r--src/llvm_backend_proc.cpp66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index 7bd8dea59..14157455e 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -1495,6 +1495,38 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
res.value = LLVMBuildInsertElement(p->builder, arg0.value, arg2.value, arg1.value, "");
return res;
+ case BuiltinProc_simd_reduce_add_bisect:
+ case BuiltinProc_simd_reduce_mul_bisect:
+ {
+ GB_ASSERT(arg0.type->kind == Type_SimdVector);
+ i64 num_elems = arg0.type->SimdVector.count;
+
+ LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems);
+ for (i64 i = 0; i < num_elems; i++) {
+ indices[i] = lb_const_int(m, t_uint, cast(u64)i).value;
+ }
+
+ switch (builtin_id) {
+ case BuiltinProc_simd_reduce_add_bisect: op_code = is_float ? LLVMFAdd : LLVMAdd; break;
+ case BuiltinProc_simd_reduce_mul_bisect: op_code = is_float ? LLVMFMul : LLVMMul; break;
+ }
+
+ LLVMValueRef remaining = arg0.value;
+ i64 num_remaining = num_elems;
+
+ while (num_remaining > 1) {
+ num_remaining /= 2;
+ LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining);
+ LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, "");
+ LLVMValueRef right_indices = LLVMConstVector(&indices[num_remaining], cast(unsigned)num_remaining);
+ LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, "");
+ remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, "");
+ }
+
+ res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], "");
+ return res;
+ }
+
case BuiltinProc_simd_reduce_add_ordered:
case BuiltinProc_simd_reduce_mul_ordered:
{
@@ -1527,6 +1559,40 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
res.value = lb_call_intrinsic(p, name, args, cast(unsigned)args_count, types, gb_count_of(types));
return res;
}
+
+ case BuiltinProc_simd_reduce_add_pairs:
+ case BuiltinProc_simd_reduce_mul_pairs:
+ {
+ GB_ASSERT(arg0.type->kind == Type_SimdVector);
+ i64 num_elems = arg0.type->SimdVector.count;
+
+ LLVMValueRef *indices = gb_alloc_array(temporary_allocator(), LLVMValueRef, num_elems);
+ for (i64 i = 0; i < num_elems/2; i++) {
+ indices[i] = lb_const_int(m, t_uint, cast(u64)(2*i)).value;
+ indices[i+num_elems/2] = lb_const_int(m, t_uint, cast(u64)(2*i+1)).value;
+ }
+
+ switch (builtin_id) {
+ case BuiltinProc_simd_reduce_add_pairs: op_code = is_float ? LLVMFAdd : LLVMAdd; break;
+ case BuiltinProc_simd_reduce_mul_pairs: op_code = is_float ? LLVMFMul : LLVMMul; break;
+ }
+
+ LLVMValueRef remaining = arg0.value;
+ i64 num_remaining = num_elems;
+
+ while (num_remaining > 1) {
+ num_remaining /= 2;
+ LLVMValueRef left_indices = LLVMConstVector(&indices[0], cast(unsigned)num_remaining);
+ LLVMValueRef left_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, left_indices, "");
+ LLVMValueRef right_indices = LLVMConstVector(&indices[num_elems/2], cast(unsigned)num_remaining);
+ LLVMValueRef right_value = LLVMBuildShuffleVector(p->builder, remaining, remaining, right_indices, "");
+ remaining = LLVMBuildBinOp(p->builder, op_code, left_value, right_value, "");
+ }
+
+ res.value = LLVMBuildExtractElement(p->builder, remaining, indices[0], "");
+ return res;
+ }
+
case BuiltinProc_simd_reduce_min:
case BuiltinProc_simd_reduce_max:
case BuiltinProc_simd_reduce_and: