diff options
Diffstat (limited to 'src/llvm_backend_expr.cpp')
| -rw-r--r-- | src/llvm_backend_expr.cpp | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 291c5ab66..7f162856c 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) { } switch (build_context.metrics.arch) { + default: + return false; case TargetArch_amd64: case TargetArch_arm64: - // possible break; - case TargetArch_386: - case TargetArch_wasm32: - case TargetArch_wasm64: - // nope - return false; } if (elem->kind == Type_Basic) { @@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { i64 src_count = src->Matrix.row_count*src->Matrix.column_count; GB_ASSERT(dst_count == src_count); - for (i64 j = 0; j < src->Matrix.column_count; j++) { - for (i64 i = 0; i < src->Matrix.row_count; i++) { - lbValue s = lb_emit_matrix_ev(p, value, i, j); - i64 index = i + j*src->Matrix.row_count; - i64 dst_i = index%dst->Matrix.row_count; - i64 dst_j = index/dst->Matrix.row_count; - lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j); - lb_emit_store(p, d, s); + lbValue pdst = v.addr; + lbValue psrc = lb_address_from_load_or_generate_local(p, value); + + bool same_elem_base_types = are_types_identical( + base_type(dst->Matrix.elem), + base_type(src->Matrix.elem) + ); + + if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) { + lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst))); + } else { + for (i64 i = 0; i < src_count; i++) { + lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i)); + lbValue sp = lb_emit_array_epi(p, psrc, matrix_column_major_index_to_offset(src, i)); + lbValue s = lb_emit_load(p, sp); + s = lb_emit_conv(p, s, dst->Matrix.elem); + lb_emit_store(p, dp, s); } } } |