aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend_expr.cpp
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-11-08 11:40:41 +0000
committergingerBill <bill@gingerbill.org>2021-11-08 11:40:41 +0000
commit23f0fbc376bc4065d0d9391415e10ea6d9b43d96 (patch)
tree30bbc1fb7dcfd3b7c12cdb6bae60369dc61659aa /src/llvm_backend_expr.cpp
parent1296fabe2c933c350315efacb0961ce794505eab (diff)
Improve matrix->matrix casting implementation
Diffstat (limited to 'src/llvm_backend_expr.cpp')
-rw-r--r--src/llvm_backend_expr.cpp33
1 files changed, 19 insertions, 14 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index 291c5ab66..7f162856c 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) {
}
switch (build_context.metrics.arch) {
+ default:
+ return false;
case TargetArch_amd64:
case TargetArch_arm64:
- // possible
break;
- case TargetArch_386:
- case TargetArch_wasm32:
- case TargetArch_wasm64:
- // nope
- return false;
}
if (elem->kind == Type_Basic) {
@@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
GB_ASSERT(dst_count == src_count);
- for (i64 j = 0; j < src->Matrix.column_count; j++) {
- for (i64 i = 0; i < src->Matrix.row_count; i++) {
- lbValue s = lb_emit_matrix_ev(p, value, i, j);
- i64 index = i + j*src->Matrix.row_count;
- i64 dst_i = index%dst->Matrix.row_count;
- i64 dst_j = index/dst->Matrix.row_count;
- lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j);
- lb_emit_store(p, d, s);
+ lbValue pdst = v.addr;
+ lbValue psrc = lb_address_from_load_or_generate_local(p, value);
+
+ bool same_elem_base_types = are_types_identical(
+ base_type(dst->Matrix.elem),
+ base_type(src->Matrix.elem)
+ );
+
+ if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) {
+ lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst)));
+ } else {
+ for (i64 i = 0; i < src_count; i++) {
+ lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i));
+ lbValue sp = lb_emit_array_epi(p, psrc, matrix_column_major_index_to_offset(src, i));
+ lbValue s = lb_emit_load(p, sp);
+ s = lb_emit_conv(p, s, dst->Matrix.elem);
+ lb_emit_store(p, dp, s);
}
}
}