aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend.cpp
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-06-01 10:51:54 +0100
committergingerBill <bill@gingerbill.org>2021-06-01 10:51:54 +0100
commit8c943eb054d6c641996920e73065a35178ba2a2d (patch)
treeeb679b02123c011f9be650fc1fdec63e6c6f7e6c /src/llvm_backend.cpp
parent446703ba756e757f143b9c166118392a27597bf8 (diff)
Make inline array arithmetic use `load+extractvalue` rather than `getelementptr+load` to give the optimizer a better hint for vectorizationdev-2021-06
Diffstat (limited to 'src/llvm_backend.cpp')
-rw-r--r--src/llvm_backend.cpp49
1 files changed, 42 insertions, 7 deletions
diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp
index 431e1429c..eaa621291 100644
--- a/src/llvm_backend.cpp
+++ b/src/llvm_backend.cpp
@@ -6863,20 +6863,46 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
lhs = lb_emit_conv(p, lhs, type);
rhs = lb_emit_conv(p, rhs, type);
- lbValue x = lb_address_from_load_or_generate_local(p, lhs);
- lbValue y = lb_address_from_load_or_generate_local(p, rhs);
-
GB_ASSERT(is_type_array(type));
Type *elem_type = base_array_type(type);
- lbAddr res = lb_add_local_generated(p, type, false);
-
i64 count = base_type(type)->Array.count;
bool inline_array_arith = type_size_of(type) <= build_context.max_align;
if (inline_array_arith) {
#if 1
+ #if 1
+ unsigned n = cast(unsigned)count;
+ auto dst_ptrs = array_make<lbValue>(temporary_allocator(), count);
+
+ auto a_loads = array_make<lbValue>(temporary_allocator(), count);
+ auto b_loads = array_make<lbValue>(temporary_allocator(), count);
+ auto c_ops = array_make<lbValue>(temporary_allocator(), count);
+
+ for (unsigned i = 0; i < n; i++) {
+ a_loads[i].value = LLVMBuildExtractValue(p->builder, lhs.value, i, "");
+ a_loads[i].type = elem_type;
+ }
+ for (unsigned i = 0; i < n; i++) {
+ b_loads[i].value = LLVMBuildExtractValue(p->builder, rhs.value, i, "");
+ b_loads[i].type = elem_type;
+ }
+ for (unsigned i = 0; i < n; i++) {
+ c_ops[i] = lb_emit_arith(p, op, a_loads[i], b_loads[i], elem_type);
+ }
+
+ lbAddr res = lb_add_local_generated(p, type, false);
+ for (unsigned i = 0; i < n; i++) {
+ dst_ptrs[i] = lb_emit_array_epi(p, res.addr, i);
+ }
+ for (unsigned i = 0; i < n; i++) {
+ lb_emit_store(p, dst_ptrs[i], c_ops[i]);
+ }
+ #else
+ lbValue x = lb_address_from_load_or_generate_local(p, lhs);
+ lbValue y = lb_address_from_load_or_generate_local(p, rhs);
+
auto a_ptrs = array_make<lbValue>(temporary_allocator(), count);
auto b_ptrs = array_make<lbValue>(temporary_allocator(), count);
auto dst_ptrs = array_make<lbValue>(temporary_allocator(), count);
@@ -6901,12 +6927,14 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
c_ops[i] = lb_emit_arith(p, op, a_loads[i], b_loads[i], elem_type);
}
+ lbAddr res = lb_add_local_generated(p, type, false);
for (i64 i = 0; i < count; i++) {
dst_ptrs[i] = lb_emit_array_epi(p, res.addr, i);
}
for (i64 i = 0; i < count; i++) {
lb_emit_store(p, dst_ptrs[i], c_ops[i]);
}
+ #endif
#else
for (i64 i = 0; i < count; i++) {
lbValue a_ptr = lb_emit_array_epi(p, x, i);
@@ -6919,7 +6947,14 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
lb_emit_store(p, dst_ptr, c);
}
#endif
+
+ return lb_addr_load(p, res);
} else {
+ lbValue x = lb_address_from_load_or_generate_local(p, lhs);
+ lbValue y = lb_address_from_load_or_generate_local(p, rhs);
+
+ lbAddr res = lb_add_local_generated(p, type, false);
+
auto loop_data = lb_loop_start(p, count, t_i32);
lbValue a_ptr = lb_emit_array_ep(p, x, loop_data.idx);
@@ -6932,9 +6967,9 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
lb_emit_store(p, dst_ptr, c);
lb_loop_end(p, loop_data);
- }
- return lb_addr_load(p, res);
+ return lb_addr_load(p, res);
+ }
}