diff options
| author | gingerBill <bill@gingerbill.org> | 2024-03-19 21:05:23 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2024-03-19 21:05:23 +0000 |
| commit | a750fc0ba63c9f1461bba4cc0446b1b4c2d2b3a9 (patch) | |
| tree | b60c41fc88da2c4af2ac1b23639cdafd8e3bcf0f /src/llvm_backend_expr.cpp | |
| parent | 433109ff52d2db76069273cd53b7aebf6aea9be0 (diff) | |
Add `#row_major matrix[R, C]T`
As well as `#column_major matrix[R, C]T` as an alias for just `matrix[R, C]T`.
This is because some libraries require a row_major internal layout but still want to be used with row or major oriented vectors.
Diffstat (limited to 'src/llvm_backend_expr.cpp')
| -rw-r--r-- | src/llvm_backend_expr.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 98618798b..6eb8fdcc6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -684,12 +684,6 @@ gb_internal lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - // TODO(bill): Determine why this fails on Windows sometimes - if (false && lb_is_matrix_simdable(mt)) { - LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); - return lb_matrix_cast_vector_to_type(p, vector, type); - } - lbAddr res = lb_add_local_generated(p, type, true); i64 row_count = mt->Matrix.row_count; @@ -763,6 +757,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, GB_ASSERT(is_type_matrix(yt)); GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + GB_ASSERT(xt->Matrix.is_row_major == yt->Matrix.is_row_major); Type *elem = xt->Matrix.elem; @@ -770,7 +765,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, unsigned inner = cast(unsigned)xt->Matrix.column_count; unsigned outer_columns = cast(unsigned)yt->Matrix.column_count; - if (lb_is_matrix_simdable(xt)) { + if (!xt->Matrix.is_row_major && lb_is_matrix_simdable(xt)) { unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt); unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt); @@ -812,7 +807,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, return lb_addr_load(p, res); } - { + if (!xt->Matrix.is_row_major) { lbAddr res = lb_add_local_generated(p, type, true); auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner); @@ -836,6 +831,30 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, } return lb_addr_load(p, res); + } else { + lbAddr res = lb_add_local_generated(p, type, true); + + auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner); + + for (unsigned i = 0; i < outer_rows; i++) { + for (unsigned j = 0; j < outer_columns; j++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + for (unsigned k = 0; k < inner; k++) { + inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k); + inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j); + } + + lbValue sum = lb_const_nil(p->module, elem); + for (unsigned k = 0; k < inner; k++) { + lbValue a = inners[k][0]; + lbValue b = inners[k][1]; + sum = lb_emit_mul_add(p, a, b, sum, elem); + } + lb_emit_store(p, dst, sum); + } + } + + return lb_addr_load(p, res); } } @@ -855,7 +874,7 @@ gb_internal lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; @@ -924,7 +943,7 @@ gb_internal lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; |