aboutsummaryrefslogtreecommitdiff
path: root/src/llvm_backend_proc.cpp
diff options
context:
space:
mode:
authorAndrea Piseri <andrea.piseri@gmail.com>2023-04-16 15:01:30 +0200
committerAndrea Piseri <andrea.piseri@gmail.com>2023-04-16 15:01:30 +0200
commitaf63eff8d738b0c1d6869510b14e5abde84a2c48 (patch)
tree6fc44cda12b6f04a90fce1511615e587a2b92110 /src/llvm_backend_proc.cpp
parentb7b5043aea792839226baf9e6d0ca54b73dac9a5 (diff)
improve code generation for `intrinsics.unaligned_load/store` on `#simd` types
the default implementation calls memcpy on an `alloca` constant, which seems to heavily confuse the optimizer and produces overall suboptimal code. Introducing this specialization simplifies the intermediate representation produced, resulting in more efficient code.
Diffstat (limited to 'src/llvm_backend_proc.cpp')
-rw-r--r--src/llvm_backend_proc.cpp24
1 files changed, 19 insertions, 5 deletions
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index 02748663b..5e709b0bf 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -2363,9 +2363,15 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
{
lbValue dst = lb_build_expr(p, ce->args[0]);
lbValue src = lb_build_expr(p, ce->args[1]);
- src = lb_address_from_load_or_generate_local(p, src);
Type *t = type_deref(dst.type);
- lb_mem_copy_non_overlapping(p, dst, src, lb_const_int(p->module, t_int, type_size_of(t)), false);
+
+ if (is_type_simd_vector(t)) {
+ LLVMValueRef store = LLVMBuildStore(p->builder, src.value, dst.value);
+ LLVMSetAlignment(store, 1);
+ } else {
+ src = lb_address_from_load_or_generate_local(p, src);
+ lb_mem_copy_non_overlapping(p, dst, src, lb_const_int(p->module, t_int, type_size_of(t)), false);
+ }
return {};
}
@@ -2373,9 +2379,17 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
{
lbValue src = lb_build_expr(p, ce->args[0]);
Type *t = type_deref(src.type);
- lbAddr dst = lb_add_local_generated(p, t, false);
- lb_mem_copy_non_overlapping(p, dst.addr, src, lb_const_int(p->module, t_int, type_size_of(t)), false);
- return lb_addr_load(p, dst);
+ if (is_type_simd_vector(t)) {
+ lbValue res = {};
+ res.type = t;
+ res.value = LLVMBuildLoad2(p->builder, lb_type(p->module, t), src.value, "");
+ LLVMSetAlignment(res.value, 1);
+ return res;
+ } else {
+ lbAddr dst = lb_add_local_generated(p, t, false);
+ lb_mem_copy_non_overlapping(p, dst.addr, src, lb_const_int(p->module, t_int, type_size_of(t)), false);
+ return lb_addr_load(p, dst);
+ }
}
case BuiltinProc_atomic_add: