aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend_expr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llvm_backend_expr.cpp')
-rw-r--r--src/llvm_backend_expr.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index 98618798b..6eb8fdcc6 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -684,12 +684,6 @@ gb_internal lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type
Type *mt = base_type(m.type);
GB_ASSERT(mt->kind == Type_Matrix);
- // TODO(bill): Determine why this fails on Windows sometimes
- if (false && lb_is_matrix_simdable(mt)) {
- LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m);
- return lb_matrix_cast_vector_to_type(p, vector, type);
- }
-
lbAddr res = lb_add_local_generated(p, type, true);
i64 row_count = mt->Matrix.row_count;
@@ -763,6 +757,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs,
GB_ASSERT(is_type_matrix(yt));
GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count);
GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem));
+ GB_ASSERT(xt->Matrix.is_row_major == yt->Matrix.is_row_major);
Type *elem = xt->Matrix.elem;
@@ -770,7 +765,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs,
unsigned inner = cast(unsigned)xt->Matrix.column_count;
unsigned outer_columns = cast(unsigned)yt->Matrix.column_count;
- if (lb_is_matrix_simdable(xt)) {
+ if (!xt->Matrix.is_row_major && lb_is_matrix_simdable(xt)) {
unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt);
unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt);
@@ -812,7 +807,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs,
return lb_addr_load(p, res);
}
- {
+ if (!xt->Matrix.is_row_major) {
lbAddr res = lb_add_local_generated(p, type, true);
auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner);
@@ -836,6 +831,30 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs,
}
return lb_addr_load(p, res);
+ } else {
+ lbAddr res = lb_add_local_generated(p, type, true);
+
+ auto inners = slice_make<lbValue[2]>(permanent_allocator(), inner);
+
+ for (unsigned i = 0; i < outer_rows; i++) {
+ for (unsigned j = 0; j < outer_columns; j++) {
+ lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j);
+ for (unsigned k = 0; k < inner; k++) {
+ inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k);
+ inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j);
+ }
+
+ lbValue sum = lb_const_nil(p->module, elem);
+ for (unsigned k = 0; k < inner; k++) {
+ lbValue a = inners[k][0];
+ lbValue b = inners[k][1];
+ sum = lb_emit_mul_add(p, a, b, sum, elem);
+ }
+ lb_emit_store(p, dst, sum);
+ }
+ }
+
+ return lb_addr_load(p, res);
}
}
@@ -855,7 +874,7 @@ gb_internal lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbVal
Type *elem = mt->Matrix.elem;
- if (lb_is_matrix_simdable(mt)) {
+ if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) {
unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
unsigned row_count = cast(unsigned)mt->Matrix.row_count;
@@ -924,7 +943,7 @@ gb_internal lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbVal
Type *elem = mt->Matrix.elem;
- if (lb_is_matrix_simdable(mt)) {
+ if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) {
unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
unsigned row_count = cast(unsigned)mt->Matrix.row_count;