From 4c21f9495d1801e136471e7e2c38ecf3ba3d963f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 5 Jun 2021 23:56:59 +0100 Subject: Clean up lbAddr_Swizzle logic for load and store --- src/llvm_backend.cpp | 120 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 47 deletions(-) (limited to 'src/llvm_backend.cpp') diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index e2bc4c16a..95aba38b4 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -290,6 +290,39 @@ void lb_emit_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValu } } +bool lb_try_vector_cast(lbProcedure *p, lbValue ptr, LLVMTypeRef *vector_type_) { + Type *array_type = base_type(type_deref(ptr.type)); + GB_ASSERT(array_type->kind == Type_Array); + Type *elem_type = base_type(array_type->Array.elem); + + if (type_size_of(array_type) <= build_context.max_align && + is_type_valid_vector_elem(elem_type)) { + // Try to treat it like a vector if possible + bool possible = false; + LLVMTypeRef vector_type = LLVMVectorType(lb_type(p->module, elem_type), cast(unsigned)array_type->Array.count); + unsigned vector_alignment = cast(unsigned)lb_alignof(vector_type); + + LLVMValueRef addr_ptr = ptr.value; + if (LLVMIsAAllocaInst(addr_ptr) || LLVMIsAGlobalValue(addr_ptr)) { + unsigned alignment = LLVMGetAlignment(addr_ptr); + alignment = gb_max(alignment, vector_alignment); + possible = true; + LLVMSetAlignment(addr_ptr, alignment); + } else if (LLVMIsALoadInst(addr_ptr)) { + unsigned alignment = LLVMGetAlignment(addr_ptr); + possible = alignment >= vector_alignment; + } + + // NOTE: Due to alignment requirements, if the pointer is not correctly aligned + // then it cannot be treated as a vector + if (possible) { + if (vector_type_) *vector_type_ =vector_type; + return true; + } + } + return false; +} + void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { if (addr.addr.value == nullptr) { return; @@ -459,14 +492,28 @@ void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { } return; } else if (addr.kind == lbAddr_Swizzle) { - lbValue ptr = lb_addr_get_ptr(p, addr); - lbValue src_ptr = lb_address_from_load_or_generate_local(p, value); + GB_ASSERT(addr.swizzle.count <= 4); + + lbValue dst = lb_addr_get_ptr(p, addr); + lbValue src = lb_address_from_load_or_generate_local(p, value); + { + lbValue src_ptrs[4] = {}; + lbValue src_loads[4] = {}; + lbValue dst_ptrs[4] = {}; - for (u8 i = 0; i < addr.swizzle.count; i++) { - u8 index = addr.swizzle.indices[i]; - lbValue dst = lb_emit_array_epi(p, ptr, index); - lbValue src = lb_emit_array_epi(p, src_ptr, i); - lb_emit_store(p, dst, lb_emit_load(p, src)); + for (u8 i = 0; i < addr.swizzle.count; i++) { + src_ptrs[i] = lb_emit_array_epi(p, src, i); + } + for (u8 i = 0; i < addr.swizzle.count; i++) { + dst_ptrs[i] = lb_emit_array_epi(p, dst, addr.swizzle.indices[i]); + } + for (u8 i = 0; i < addr.swizzle.count; i++) { + src_loads[i] = lb_emit_load(p, src_ptrs[i]); + } + + for (u8 i = 0; i < addr.swizzle.count; i++) { + lb_emit_store(p, dst_ptrs[i], src_loads[i]); + } } return; } @@ -753,46 +800,25 @@ lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) { lbAddr res = lb_add_local_generated(p, addr.swizzle.type, false); lbValue ptr = lb_addr_get_ptr(p, res); - if (type_size_of(array_type) <= build_context.max_align && - is_type_valid_vector_elem(elem_type)) { - // Try to treat it like a vector if possible - bool possible = false; - LLVMTypeRef vector_type = LLVMVectorType(lb_type(p->module, elem_type), cast(unsigned)array_type->Array.count); - unsigned vector_alignment = cast(unsigned)lb_alignof(vector_type); - - LLVMValueRef addr_ptr = addr.addr.value; - if (LLVMIsAAllocaInst(addr_ptr) || LLVMIsAGlobalValue(addr_ptr)) { - unsigned alignment = LLVMGetAlignment(addr_ptr); - alignment = gb_max(alignment, vector_alignment); - possible = true; - LLVMSetAlignment(addr_ptr, alignment); - } else if (LLVMIsALoadInst(addr_ptr)) { - unsigned alignment = LLVMGetAlignment(addr_ptr); - possible = alignment >= vector_alignment; - } - - // NOTE: Due to alignment requirements, if the pointer is not correctly aligned - // then it cannot be treated as a vector - if (possible) { - LLVMValueRef vp = LLVMBuildPointerCast(p->builder, addr_ptr, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); - LLVMValueRef scalars[4] = {}; - for (u8 i = 0; i < addr.swizzle.count; i++) { - scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false); - } - LLVMValueRef mask = LLVMConstVector(scalars, addr.swizzle.count); - LLVMValueRef sv = LLVMBuildShuffleVector(p->builder, v, LLVMGetUndef(vector_type), mask, ""); - - LLVMBuildStore(p->builder, sv, LLVMBuildPointerCast(p->builder, ptr.value, LLVMTypeOf(vp), "")); - return lb_addr_load(p, res); - } - } - - for (u8 i = 0; i < addr.swizzle.count; i++) { - u8 index = addr.swizzle.indices[i]; - lbValue dst = lb_emit_array_epi(p, ptr, i); - lbValue src = lb_emit_array_epi(p, addr.addr, index); - lb_emit_store(p, dst, lb_emit_load(p, src)); + LLVMTypeRef vector_type = nullptr; + if (lb_try_vector_cast(p, addr.addr, &vector_type)) { + LLVMValueRef vp = LLVMBuildPointerCast(p->builder, addr.addr.value, LLVMPointerType(vector_type, 0), ""); + LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); + LLVMValueRef scalars[4] = {}; + for (u8 i = 0; i < addr.swizzle.count; i++) { + scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false); + } + LLVMValueRef mask = LLVMConstVector(scalars, addr.swizzle.count); + LLVMValueRef sv = LLVMBuildShuffleVector(p->builder, v, LLVMGetUndef(vector_type), mask, ""); + + LLVMBuildStore(p->builder, sv, LLVMBuildPointerCast(p->builder, ptr.value, LLVMTypeOf(vp), "")); + } else { + for (u8 i = 0; i < addr.swizzle.count; i++) { + u8 index = addr.swizzle.indices[i]; + lbValue dst = lb_emit_array_epi(p, ptr, i); + lbValue src = lb_emit_array_epi(p, addr.addr, index); + lb_emit_store(p, dst, lb_emit_load(p, src)); + } } return lb_addr_load(p, res); } -- cgit v1.2.3