diff options
| author | gingerBill <bill@gingerbill.org> | 2024-03-19 21:25:29 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2024-03-19 21:25:29 +0000 |
| commit | 5714ea4ea3d21448fd9c3d1f256db6eae6a9b21e (patch) | |
| tree | e02664fd4441a5bd69b1b5399ef4d49232868518 /src/llvm_backend_expr.cpp | |
| parent | 9d9b190adab220a19d9afbc2df3c4a88a162d4e7 (diff) | |
| parent | cd7137af60ba2c3f1d95ae77d59e6663412eab4a (diff) | |
Merge branch 'master' into orca-dev
Diffstat (limited to 'src/llvm_backend_expr.cpp')
| -rw-r--r-- | src/llvm_backend_expr.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 98618798b..6eb8fdcc6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -684,12 +684,6 @@ gb_internal lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - // TODO(bill): Determine why this fails on Windows sometimes - if (false && lb_is_matrix_simdable(mt)) { - LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); - return lb_matrix_cast_vector_to_type(p, vector, type); - } - lbAddr res = lb_add_local_generated(p, type, true); i64 row_count = mt->Matrix.row_count; @@ -763,6 +757,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, GB_ASSERT(is_type_matrix(yt)); GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + GB_ASSERT(xt->Matrix.is_row_major == yt->Matrix.is_row_major); Type *elem = xt->Matrix.elem; @@ -770,7 +765,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, unsigned inner = cast(unsigned)xt->Matrix.column_count; unsigned outer_columns = cast(unsigned)yt->Matrix.column_count; - if (lb_is_matrix_simdable(xt)) { + if (!xt->Matrix.is_row_major && lb_is_matrix_simdable(xt)) { unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt); unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt); @@ -812,7 +807,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, return lb_addr_load(p, res); } - { + if (!xt->Matrix.is_row_major) { lbAddr res = lb_add_local_generated(p, type, true); auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner); @@ -836,6 +831,30 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, } return lb_addr_load(p, res); + } else { + lbAddr res = lb_add_local_generated(p, type, true); + + auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner); + + for (unsigned i = 0; i < outer_rows; i++) { + for (unsigned j = 0; j < outer_columns; j++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + for (unsigned k = 0; k < inner; k++) { + inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k); + inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j); + } + + lbValue sum = lb_const_nil(p->module, elem); + for (unsigned k = 0; k < inner; k++) { + lbValue a = inners[k][0]; + lbValue b = inners[k][1]; + sum = lb_emit_mul_add(p, a, b, sum, elem); + } + lb_emit_store(p, dst, sum); + } + } + + return lb_addr_load(p, res); } } @@ -855,7 +874,7 @@ gb_internal lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; @@ -924,7 +943,7 @@ gb_internal lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; |