diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2022-05-31 11:52:24 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-05-31 11:52:24 +0100 |
| commit | a1f15c2c69b557be5a95882d18137d1f74d980ee (patch) | |
| tree | 3f484753712a6d9d9cf1074f56bc91af6d6432c1 /src/llvm_backend_expr.cpp | |
| parent | a6c779b50ecf5c8c0cb86c9d49768ab34508b1d2 (diff) | |
| parent | 516f6647b46c69a67139154c02c74b436cd4b999 (diff) | |
Merge pull request #1807 from odin-lang/simd-dev
Generic #simd type and intrinsics
Diffstat (limited to 'src/llvm_backend_expr.cpp')
| -rw-r--r-- | src/llvm_backend_expr.cpp | 208 |
1 files changed, 207 insertions, 1 deletions
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 133df4d41..1894e85f6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -258,7 +258,13 @@ lbValue lb_emit_unary_arith(lbProcedure *p, TokenKind op, lbValue x, Type *type) LLVMBuildStore(p->builder, v2, LLVMBuildStructGEP(p->builder, addr.addr.value, 2, "")); LLVMBuildStore(p->builder, v3, LLVMBuildStructGEP(p->builder, addr.addr.value, 3, "")); return lb_addr_load(p, addr); - + } else if (is_type_simd_vector(x.type)) { + Type *elem = base_array_type(x.type); + if (is_type_float(elem)) { + res.value = LLVMBuildFNeg(p->builder, x.value, ""); + } else { + res.value = LLVMBuildNeg(p->builder, x.value, ""); + } } else { GB_PANIC("Unhandled type %s", type_to_string(x.type)); } @@ -1820,6 +1826,59 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return res; } + if (is_type_simd_vector(dst)) { + Type *et = base_array_type(dst); + if (is_type_simd_vector(src)) { + Type *src_elem = core_array_type(src); + Type *dst_elem = core_array_type(dst); + + GB_ASSERT(src->SimdVector.count == dst->SimdVector.count); + + lbValue res = {}; + res.type = t; + if (are_types_identical(src_elem, dst_elem)) { + res.value = value.value; + } else if (is_type_float(src_elem) && is_type_integer(dst_elem)) { + if (is_type_unsigned(dst_elem)) { + res.value = LLVMBuildFPToUI(p->builder, value.value, lb_type(m, t), ""); + } else { + res.value = LLVMBuildFPToSI(p->builder, value.value, lb_type(m, t), ""); + } + } else if (is_type_integer(src_elem) && is_type_float(dst_elem)) { + if (is_type_unsigned(src_elem)) { + res.value = LLVMBuildUIToFP(p->builder, value.value, lb_type(m, t), ""); + } else { + res.value = LLVMBuildSIToFP(p->builder, value.value, lb_type(m, t), ""); + } + } else if ((is_type_integer(src_elem) || is_type_boolean(src_elem)) && is_type_integer(dst_elem)) { + res.value = LLVMBuildIntCast2(p->builder, value.value, lb_type(m, t), !is_type_unsigned(src_elem), ""); + } else if (is_type_float(src_elem) && is_type_float(dst_elem)) { + res.value = LLVMBuildFPCast(p->builder, value.value, lb_type(m, t), ""); + } else if (is_type_integer(src_elem) && is_type_boolean(dst_elem)) { + LLVMValueRef i1vector = LLVMBuildICmp(p->builder, LLVMIntNE, value.value, LLVMConstNull(LLVMTypeOf(value.value)), ""); + res.value = LLVMBuildIntCast2(p->builder, i1vector, lb_type(m, t), !is_type_unsigned(src_elem), ""); + } else { + GB_PANIC("Unhandled simd vector conversion: %s -> %s", type_to_string(src), type_to_string(dst)); + } + return res; + } else { + i64 count = get_array_type_count(dst); + LLVMTypeRef vt = lb_type(m, t); + LLVMTypeRef llvm_u32 = lb_type(m, t_u32); + LLVMValueRef elem = lb_emit_conv(p, value, et).value; + LLVMValueRef vector = LLVMConstNull(vt); + for (i64 i = 0; i < count; i++) { + LLVMValueRef idx = LLVMConstInt(llvm_u32, i, false); + vector = LLVMBuildInsertElement(p->builder, vector, elem, idx, ""); + } + lbValue res = {}; + res.type = t; + res.value = vector; + return res; + } + } + + // Pointer <-> uintptr if (is_type_pointer(src) && is_type_uintptr(dst)) { lbValue res = {}; @@ -2506,6 +2565,57 @@ lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue ri case Token_NotEq: pred = LLVMIntNE; break; } res.value = LLVMBuildICmp(p->builder, pred, left.value, right.value, ""); + } else if (is_type_simd_vector(a)) { + LLVMValueRef mask = nullptr; + Type *elem = base_array_type(a); + if (is_type_float(elem)) { + LLVMRealPredicate pred = {}; + switch (op_kind) { + case Token_CmpEq: pred = LLVMRealOEQ; break; + case Token_NotEq: pred = LLVMRealONE; break; + } + mask = LLVMBuildFCmp(p->builder, pred, left.value, right.value, ""); + } else { + LLVMIntPredicate pred = {}; + switch (op_kind) { + case Token_CmpEq: pred = LLVMIntEQ; break; + case Token_NotEq: pred = LLVMIntNE; break; + } + mask = LLVMBuildICmp(p->builder, pred, left.value, right.value, ""); + } + GB_ASSERT_MSG(mask != nullptr, "Unhandled comparison kind %s (%s) %.*s %s (%s)", type_to_string(left.type), type_to_string(base_type(left.type)), LIT(token_strings[op_kind]), type_to_string(right.type), type_to_string(base_type(right.type))); + + /* NOTE(bill, 2022-05-28): + Thanks to Per Vognsen, sign extending <N x i1> to + a vector of the same width as the input vector, bit casting to an integer, + and then comparing against zero is the better option + See: https://lists.llvm.org/pipermail/llvm-dev/2012-September/053046.html + + // Example assuming 128-bit vector + + %1 = <4 x float> ... + %2 = <4 x float> ... + %3 = fcmp oeq <4 x float> %1, %2 + %4 = sext <4 x i1> %3 to <4 x i32> + %5 = bitcast <4 x i32> %4 to i128 + %6 = icmp ne i128 %5, 0 + br i1 %6, label %true1, label %false2 + + This will result in 1 cmpps + 1 ptest + 1 br + (even without SSE4.1, contrary to what the mail list states, because of pmovmskb) + + */ + + unsigned count = cast(unsigned)get_array_type_count(a); + unsigned elem_sz = cast(unsigned)(type_size_of(elem)*8); + LLVMTypeRef mask_type = LLVMVectorType(LLVMIntTypeInContext(p->module->ctx, elem_sz), count); + mask = LLVMBuildSExtOrBitCast(p->builder, mask, mask_type, ""); + + LLVMTypeRef mask_int_type = LLVMIntTypeInContext(p->module->ctx, cast(unsigned)(8*type_size_of(a))); + LLVMValueRef mask_int = LLVMBuildBitCast(p->builder, mask, mask_int_type, ""); + res.value = LLVMBuildICmp(p->builder, LLVMIntNE, mask_int, LLVMConstNull(LLVMTypeOf(mask_int)), ""); + return res; + } else { GB_PANIC("Unhandled comparison kind %s (%s) %.*s %s (%s)", type_to_string(left.type), type_to_string(base_type(left.type)), LIT(token_strings[op_kind]), type_to_string(right.type), type_to_string(base_type(right.type))); } @@ -4609,6 +4719,102 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { break; } + case Type_SimdVector: { + if (cl->elems.count > 0) { + lbValue vector_value = lb_const_value(p->module, type, exact_value_compound(expr)); + defer (lb_addr_store(p, v, vector_value)); + + auto temp_data = array_make<lbCompoundLitElemTempData>(temporary_allocator(), 0, cl->elems.count); + + // NOTE(bill): Separate value, store into their own chunks + for_array(i, cl->elems) { + Ast *elem = cl->elems[i]; + if (elem->kind == Ast_FieldValue) { + ast_node(fv, FieldValue, elem); + if (lb_is_elem_const(fv->value, et)) { + continue; + } + if (is_ast_range(fv->field)) { + ast_node(ie, BinaryExpr, fv->field); + TypeAndValue lo_tav = ie->left->tav; + TypeAndValue hi_tav = ie->right->tav; + GB_ASSERT(lo_tav.mode == Addressing_Constant); + GB_ASSERT(hi_tav.mode == Addressing_Constant); + + TokenKind op = ie->op.kind; + i64 lo = exact_value_to_i64(lo_tav.value); + i64 hi = exact_value_to_i64(hi_tav.value); + if (op != Token_RangeHalf) { + hi += 1; + } + + lbValue value = lb_build_expr(p, fv->value); + + for (i64 k = lo; k < hi; k++) { + lbCompoundLitElemTempData data = {}; + data.value = value; + data.elem_index = cast(i32)k; + array_add(&temp_data, data); + } + + } else { + auto tav = fv->field->tav; + GB_ASSERT(tav.mode == Addressing_Constant); + i64 index = exact_value_to_i64(tav.value); + + lbValue value = lb_build_expr(p, fv->value); + lbCompoundLitElemTempData data = {}; + data.value = lb_emit_conv(p, value, et); + data.expr = fv->value; + data.elem_index = cast(i32)index; + array_add(&temp_data, data); + } + + } else { + if (lb_is_elem_const(elem, et)) { + continue; + } + lbCompoundLitElemTempData data = {}; + data.expr = elem; + data.elem_index = cast(i32)i; + array_add(&temp_data, data); + } + } + + + for_array(i, temp_data) { + lbValue field_expr = temp_data[i].value; + Ast *expr = temp_data[i].expr; + + auto prev_hint = lb_set_copy_elision_hint(p, lb_addr(temp_data[i].gep), expr); + + if (field_expr.value == nullptr) { + field_expr = lb_build_expr(p, expr); + } + Type *t = field_expr.type; + GB_ASSERT(t->kind != Type_Tuple); + lbValue ev = lb_emit_conv(p, field_expr, et); + + if (!p->copy_elision_hint.used) { + temp_data[i].value = ev; + } + + lb_reset_copy_elision_hint(p, prev_hint); + } + + + // TODO(bill): reduce the need for individual `insertelement` if a `shufflevector` + // might be a better option + + for_array(i, temp_data) { + if (temp_data[i].value.value != nullptr) { + LLVMValueRef index = lb_const_int(p->module, t_u32, temp_data[i].elem_index).value; + vector_value.value = LLVMBuildInsertElement(p->builder, vector_value.value, temp_data[i].value.value, index, ""); + } + } + } + break; + } } return v; |