diff options
| author | gingerBill <bill@gingerbill.org> | 2021-10-20 14:49:20 +0100 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2021-10-20 14:49:20 +0100 |
| commit | d0d9a3a4f4f3b4bc528c73ffcecb31d3eb4162a7 (patch) | |
| tree | b08a4bea39b12d98eb3cfa56964660b0ef339ace /src/llvm_backend_utility.cpp | |
| parent | 9e43072113782a96a3ab14a63c1d5cfc9a99b881 (diff) | |
Make `lb_emit_matrix_mul` SIMD if possible
Diffstat (limited to 'src/llvm_backend_utility.cpp')
| -rw-r--r-- | src/llvm_backend_utility.cpp | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index b07dc3459..6754ce798 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1577,7 +1577,7 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { GB_ASSERT_MSG(id != 0, "Unable to find %s", name); LLVMTypeRef types[1] = {}; - types[0] = elem; + types[0] = type; LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types)); LLVMValueRef values[2] = {}; @@ -1585,4 +1585,31 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { values[1] = value; LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, ""); return call; +} + +LLVMValueRef llvm_vector_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b)); + + LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a)); + + if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) { + return LLVMBuildAdd(p->builder, a, b, ""); + } + return LLVMBuildFAdd(p->builder, a, b, ""); +} + +LLVMValueRef llvm_vector_mul(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b)); + + LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a)); + + if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) { + return LLVMBuildMul(p->builder, a, b, ""); + } + return LLVMBuildFMul(p->builder, a, b, ""); +} + + +LLVMValueRef llvm_vector_dot(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + return llvm_vector_reduce_add(p, llvm_vector_mul(p, a, b)); }
\ No newline at end of file |