From 7faca7066c30d6e663b268dc1e8ec66710ae3dd5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:51:16 +0100 Subject: Add builtin `transpose` --- src/llvm_backend_proc.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 222161164..1431fffaa 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1257,6 +1257,12 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, return lb_soa_zip(p, ce, tv); case BuiltinProc_soa_unzip: return lb_soa_unzip(p, ce, tv); + + case BuiltinProc_transpose: + { + lbValue m = lb_build_expr(p, ce->args[0]); + return lb_emit_matrix_tranpose(p, m, tv.type); + } // "Intrinsics" -- cgit v1.2.3 From 68afbb37f40b10fd01dda9e5640cc7ae2535a371 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 02:06:56 +0100 Subject: Add builtin `outer_product` --- src/check_builtin.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++ src/checker_builtin_procs.hpp | 2 ++ src/llvm_backend_expr.cpp | 32 +++++++++++++++++++++++ src/llvm_backend_proc.cpp | 8 ++++++ 4 files changed, 102 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 659a74ad7..1d033932f 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2017,6 +2017,66 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 operand->type = check_matrix_type_hint(operand->type, type_hint); break; } + + case BuiltinProc_outer_product: { + Operand x = {}; + Operand y = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &y, ce->args[1]); + if (y.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x) || !is_operand_value(y)) { + error(call, "'%.*s' expects only arrays", LIT(builtin_name)); + return false; + } + + if (!is_type_array(x.type) && !is_type_array(y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects only arrays, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + Type *xt = base_type(x.type); + Type *yt = base_type(y.type); + GB_ASSERT(xt->kind == Type_Array); + GB_ASSERT(yt->kind == Type_Array); + if (!are_types_identical(xt->Array.elem, yt->Array.elem)) { + gbString s1 = type_to_string(xt->Array.elem); + gbString s2 = type_to_string(yt->Array.elem); + error(call, "'%.*s' mismatched element types, got %s vs %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + if (xt->Array.count == 0 || yt->Array.count == 0) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects only arrays of non-zero length, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + i64 max_count = xt->Array.count*yt->Array.count; + if (max_count > MAX_MATRIX_ELEMENT_COUNT) { + error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MAX_MATRIX_ELEMENT_COUNT); + return false; + } + + operand->mode = Addressing_Value; + operand->type = alloc_type_matrix(xt->Array.elem, xt->Array.count, yt->Array.count); + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + case BuiltinProc_simd_vector: { Operand x = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 21a33bdd3..2c7392b09 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -36,6 +36,7 @@ enum BuiltinProcId { BuiltinProc_soa_unzip, BuiltinProc_transpose, + BuiltinProc_outer_product, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -278,6 +279,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("soa_unzip"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index d41a0a127..27f12a829 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -522,9 +522,41 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { } } return lb_addr_load(p, res); +} + + +lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) { + Type *mt = base_type(type); + Type *at = base_type(a.type); + Type *bt = base_type(b.type); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(at->kind == Type_Array); + GB_ASSERT(bt->kind == Type_Array); + + + i64 row_count = mt->Matrix.row_count; + i64 column_count = mt->Matrix.column_count; + + GB_ASSERT(row_count == at->Array.count); + GB_ASSERT(column_count == bt->Array.count); + + + lbAddr res = lb_add_local_generated(p, type, true); + + for (i64 j = 0; j < column_count; j++) { + for (i64 i = 0; i < row_count; i++) { + lbValue x = lb_emit_struct_ev(p, a, cast(i32)i); + lbValue y = lb_emit_struct_ev(p, b, cast(i32)j); + lbValue src = lb_emit_arith(p, Token_Mul, x, y, mt->Matrix.elem); + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + lb_emit_store(p, dst, src); + } + } + return lb_addr_load(p, res); } + lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 1431fffaa..5a7fc1626 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1263,6 +1263,14 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue m = lb_build_expr(p, ce->args[0]); return lb_emit_matrix_tranpose(p, m, tv.type); } + + case BuiltinProc_outer_product: + { + lbValue a = lb_build_expr(p, ce->args[0]); + lbValue b = lb_build_expr(p, ce->args[1]); + return lb_emit_outer_product(p, a, b, tv.type); + } + // "Intrinsics" -- cgit v1.2.3 From cee45c1b155fcc917c2b0f9cfdbfa060304255e1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 02:18:30 +0100 Subject: Add `hadamard_product` --- src/check_builtin.cpp | 56 ++++++++++++++++++++++++++++++++++++++++++- src/check_type.cpp | 12 +++------- src/checker_builtin_procs.hpp | 2 ++ src/llvm_backend_expr.cpp | 6 ++--- src/llvm_backend_proc.cpp | 10 ++++++++ src/types.cpp | 11 +++++++++ 6 files changed, 84 insertions(+), 13 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 1d033932f..a9427d4e0 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2056,6 +2056,14 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 return false; } + Type *elem = xt->Array.elem; + + if (!is_type_valid_for_matrix_elems(elem)) { + gbString s = type_to_string(elem); + error(call, "Matrix elements types are limited to integers, floats, and complex, got %s", s); + gb_string_free(s); + } + if (xt->Array.count == 0 || yt->Array.count == 0) { gbString s1 = type_to_string(x.type); gbString s2 = type_to_string(y.type); @@ -2072,7 +2080,53 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } operand->mode = Addressing_Value; - operand->type = alloc_type_matrix(xt->Array.elem, xt->Array.count, yt->Array.count); + operand->type = alloc_type_matrix(elem, xt->Array.count, yt->Array.count); + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + + case BuiltinProc_hadamard_product: { + Operand x = {}; + Operand y = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &y, ce->args[1]); + if (y.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x) || !is_operand_value(y)) { + error(call, "'%.*s' expects a matrix or array types", LIT(builtin_name)); + return false; + } + if (!is_type_matrix(x.type) && !is_type_array(y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects matrix or array values, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + if (!are_types_identical(x.type, y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' values of the same type, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + Type *elem = core_array_type(x.type); + if (!is_type_valid_for_matrix_elems(elem)) { + gbString s = type_to_string(elem); + error(call, "'%.*s' expects elements to be types are limited to integers, floats, and complex, got %s", LIT(builtin_name), s); + gb_string_free(s); + } + + operand->mode = Addressing_Value; + operand->type = x.type; operand->type = check_matrix_type_hint(operand->type, type_hint); break; } diff --git a/src/check_type.cpp b/src/check_type.cpp index e752f192d..d9302c65a 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -997,8 +997,8 @@ void check_bit_set_type(CheckerContext *c, Type *type, Type *named_type, Ast *no GB_ASSERT(lower <= upper); - i64 bits = MAX_BITS; - if (bs->underlying != nullptr) { + i64 bits = MAX_BITS +; if (bs->underlying != nullptr) { Type *u = check_type(c, bs->underlying); if (!is_type_integer(u)) { gbString ts = type_to_string(u); @@ -2239,13 +2239,7 @@ void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count); } - if (is_type_integer(elem)) { - // okay - } else if (is_type_float(elem)) { - // okay - } else if (is_type_complex(elem)) { - // okay - } else { + if (!is_type_valid_for_matrix_elems(elem)) { gbString s = type_to_string(elem); error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s); gb_string_free(s); diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 2c7392b09..de4e99d14 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -37,6 +37,7 @@ enum BuiltinProcId { BuiltinProc_transpose, BuiltinProc_outer_product, + BuiltinProc_hadamard_product, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -280,6 +281,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 27f12a829..b894bc7b8 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -672,13 +672,13 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type -lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { +lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type, bool component_wise=false) { GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); - if (op == Token_Mul) { + if (op == Token_Mul && !component_wise) { if (xt->kind == Type_Matrix) { if (yt->kind == Type_Matrix) { return lb_emit_matrix_mul(p, lhs, rhs, type); @@ -703,7 +703,7 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue array_lhs.type = array_type; array_rhs.type = array_type; - lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, type); + lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, array_type); array.type = type; return array; } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5a7fc1626..da4e4ad28 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1270,6 +1270,16 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue b = lb_build_expr(p, ce->args[1]); return lb_emit_outer_product(p, a, b, tv.type); } + case BuiltinProc_hadamard_product: + { + lbValue a = lb_build_expr(p, ce->args[0]); + lbValue b = lb_build_expr(p, ce->args[1]); + if (is_type_array(tv.type)) { + return lb_emit_arith(p, Token_Mul, a, b, tv.type); + } + GB_ASSERT(is_type_matrix(tv.type)); + return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true); + } // "Intrinsics" diff --git a/src/types.cpp b/src/types.cpp index eaf1bac74..32e26bcc6 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1333,6 +1333,17 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { return stride_elems*column_index + row_index; } +bool is_type_valid_for_matrix_elems(Type *t) { + if (is_type_integer(t)) { + return true; + } else if (is_type_float(t)) { + return true; + } else if (is_type_complex(t)) { + return true; + } + return false; +} + bool is_type_dynamic_array(Type *t) { t = base_type(t); return t->kind == Type_DynamicArray; -- cgit v1.2.3 From d3abc1a2b4fe024fed5f2b9f5371fc2b7fb029be Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:33:23 +0100 Subject: Add `matrix_flatten` - `matrix[R, C]T` -> `[R*C]T` --- src/check_builtin.cpp | 30 +++++++++++++++++ src/checker_builtin_procs.hpp | 2 ++ src/llvm_backend_expr.cpp | 77 ++++++++++++++++++++++++++++++++++++++----- src/llvm_backend_proc.cpp | 6 ++++ 4 files changed, 106 insertions(+), 9 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index a9427d4e0..b60509c03 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2131,6 +2131,36 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 break; } + case BuiltinProc_matrix_flatten: { + Operand x = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x)) { + error(call, "'%.*s' expects a matrix or array", LIT(builtin_name)); + return false; + } + Type *t = base_type(x.type); + if (!is_type_matrix(t) && !is_type_array(t)) { + gbString s = type_to_string(x.type); + error(call, "'%.*s' expects a matrix or array, got %s", LIT(builtin_name), s); + gb_string_free(s); + return false; + } + + operand->mode = Addressing_Value; + if (is_type_array(t)) { + // Do nothing + operand->type = x.type; + } else { + GB_ASSERT(t->kind == Type_Matrix); + operand->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count*t->Matrix.column_count); + } + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + case BuiltinProc_simd_vector: { Operand x = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index de4e99d14..5594c1a1a 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -38,6 +38,7 @@ enum BuiltinProcId { BuiltinProc_transpose, BuiltinProc_outer_product, BuiltinProc_hadamard_product, + BuiltinProc_matrix_flatten, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -282,6 +283,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("matrix_flatten"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index c1bdceba6..7d1c8e3db 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -517,6 +517,33 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { return matrix_vector; } +LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { + Type *mt = base_type(m.type); + GB_ASSERT(mt->kind == Type_Matrix); + + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; + unsigned column_count = cast(unsigned)mt->Matrix.column_count; + + auto columns = slice_make(permanent_allocator(), column_count); + + LLVMValueRef vector = lb_matrix_to_vector(p, m); + + unsigned mask_elems_index = 0; + auto mask_elems = slice_make(permanent_allocator(), row_count*column_count); + for (unsigned j = 0; j < column_count; j++) { + for (unsigned i = 0; i < row_count; i++) { + unsigned offset = stride*j + i; + mask_elems[mask_elems_index++] = lb_const_int(p->module, t_u32, offset).value; + } + } + + LLVMValueRef mask = LLVMConstVector(mask_elems.data, cast(unsigned)mask_elems.count); + LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); + return trimmed_vector; +} + + lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { if (is_type_array(m.type)) { // no-op @@ -573,6 +600,46 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { return lb_addr_load(p, res); } +lbValue lb_matrix_cast_vector_to_type(lbProcedure *p, LLVMValueRef vector, Type *type) { + lbAddr res = lb_add_local_generated(p, type, true); + LLVMValueRef res_ptr = res.addr.value; + unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); + LLVMSetAlignment(res_ptr, alignment); + + res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); + LLVMBuildStore(p->builder, vector, res_ptr); + + return lb_addr_load(p, res); +} + +lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type) { + if (is_type_array(m.type)) { + // no-op + m.type = type; + return m; + } + Type *mt = base_type(m.type); + GB_ASSERT(mt->kind == Type_Matrix); + + if (lb_matrix_elem_simple(mt)) { + LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); + return lb_matrix_cast_vector_to_type(p, vector, type); + } + + lbAddr res = lb_add_local_generated(p, type, true); + + i64 row_count = mt->Matrix.row_count; + i64 column_count = mt->Matrix.column_count; + for (i64 j = 0; j < column_count; j++) { + for (i64 i = 0; i < row_count; i++) { + lbValue src = lb_emit_matrix_ev(p, m, i, j); + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + lb_emit_store(p, dst, src); + } + } + return lb_addr_load(p, res); +} + lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) { Type *mt = base_type(type); @@ -737,16 +804,8 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type vector = llvm_vector_add(p, vector, product); } } - - lbAddr res = lb_add_local_generated(p, type, true); - LLVMValueRef res_ptr = res.addr.value; - unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); - LLVMSetAlignment(res_ptr, alignment); - - res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); - LLVMBuildStore(p->builder, vector, res_ptr); - return lb_addr_load(p, res); + return lb_matrix_cast_vector_to_type(p, vector, type); } lbAddr res = lb_add_local_generated(p, type, true); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index da4e4ad28..8686b3262 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1280,6 +1280,12 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, GB_ASSERT(is_type_matrix(tv.type)); return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true); } + + case BuiltinProc_matrix_flatten: + { + lbValue m = lb_build_expr(p, ce->args[0]); + return lb_emit_matrix_flatten(p, m, tv.type); + } // "Intrinsics" -- cgit v1.2.3 From 3e4c2e49320b6ddd905b38fc884ec47aa8da7748 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 16:03:16 +0100 Subject: Support `conj` on array and matrix types --- src/check_builtin.cpp | 13 ++++++-- src/llvm_backend_proc.cpp | 77 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 62 insertions(+), 28 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b60509c03..7dc4784f8 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1266,7 +1266,10 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 case BuiltinProc_conj: { // conj :: proc(x: type) -> type Operand *x = operand; - if (is_type_complex(x->type)) { + Type *t = x->type; + Type *elem = core_array_type(t); + + if (is_type_complex(t)) { if (x->mode == Addressing_Constant) { ExactValue v = exact_value_to_complex(x->value); f64 r = v.value_complex->real; @@ -1276,7 +1279,7 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } else { x->mode = Addressing_Value; } - } else if (is_type_quaternion(x->type)) { + } else if (is_type_quaternion(t)) { if (x->mode == Addressing_Constant) { ExactValue v = exact_value_to_quaternion(x->value); f64 r = +v.value_quaternion->real; @@ -1288,7 +1291,11 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } else { x->mode = Addressing_Value; } - } else { + } else if (is_type_array_like(t) && (is_type_complex(elem) || is_type_quaternion(elem))) { + x->mode = Addressing_Value; + } else if (is_type_matrix(t) && (is_type_complex(elem) || is_type_quaternion(elem))) { + x->mode = Addressing_Value; + }else { gbString s = type_to_string(x->type); error(call, "Expected a complex or quaternion, got '%s'", s); gb_string_free(s); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 8686b3262..72ba3982c 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -725,6 +725,57 @@ lbValue lb_emit_runtime_call(lbProcedure *p, char const *c_name, Array return lb_emit_call(p, proc, args); } +lbValue lb_emit_conjugate(lbProcedure *p, lbValue val, Type *type) { + lbValue res = {}; + Type *t = val.type; + if (is_type_complex(t)) { + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false)); + lbValue real = lb_emit_struct_ev(p, val, 0); + lbValue imag = lb_emit_struct_ev(p, val, 1); + imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); + lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real); + lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag); + } else if (is_type_quaternion(t)) { + // @QuaternionLayout + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false)); + lbValue real = lb_emit_struct_ev(p, val, 3); + lbValue imag = lb_emit_struct_ev(p, val, 0); + lbValue jmag = lb_emit_struct_ev(p, val, 1); + lbValue kmag = lb_emit_struct_ev(p, val, 2); + imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); + jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type); + kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type); + lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real); + lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag); + lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag); + lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag); + } else if (is_type_array_like(t)) { + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true)); + Type *elem_type = base_array_type(t); + i64 count = get_array_type_count(t); + for (i64 i = 0; i < count; i++) { + lbValue dst = lb_emit_array_epi(p, res, i); + lbValue elem = lb_emit_struct_ev(p, val, cast(i32)i); + elem = lb_emit_conjugate(p, elem, elem_type); + lb_emit_store(p, dst, elem); + } + } else if (is_type_matrix(t)) { + Type *mt = base_type(t); + GB_ASSERT(mt->kind == Type_Matrix); + Type *elem_type = mt->Matrix.elem; + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true)); + for (i64 j = 0; j < mt->Matrix.column_count; j++) { + for (i64 i = 0; i < mt->Matrix.row_count; i++) { + lbValue dst = lb_emit_matrix_epi(p, res, i, j); + lbValue elem = lb_emit_matrix_ev(p, val, i, j); + elem = lb_emit_conjugate(p, elem, elem_type); + lb_emit_store(p, dst, elem); + } + } + } + return lb_emit_load(p, res); +} + lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining, bool use_copy_elision_hint) { lbModule *m = p->module; @@ -1117,31 +1168,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, case BuiltinProc_conj: { lbValue val = lb_build_expr(p, ce->args[0]); - lbValue res = {}; - Type *t = val.type; - if (is_type_complex(t)) { - res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false)); - lbValue real = lb_emit_struct_ev(p, val, 0); - lbValue imag = lb_emit_struct_ev(p, val, 1); - imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); - lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real); - lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag); - } else if (is_type_quaternion(t)) { - // @QuaternionLayout - res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false)); - lbValue real = lb_emit_struct_ev(p, val, 3); - lbValue imag = lb_emit_struct_ev(p, val, 0); - lbValue jmag = lb_emit_struct_ev(p, val, 1); - lbValue kmag = lb_emit_struct_ev(p, val, 2); - imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); - jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type); - kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type); - lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real); - lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag); - lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag); - lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag); - } - return lb_emit_load(p, res); + return lb_emit_conjugate(p, val, tv.type); } case BuiltinProc_expand_to_tuple: { -- cgit v1.2.3 From e6f725dc2c71d960defda3aa549b47e0cd043c70 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 17:00:14 +0100 Subject: Minor fix for parapoly matrix types --- src/check_builtin.cpp | 1 - src/llvm_backend_proc.cpp | 3 +-- src/types.cpp | 4 ++++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 7dc4784f8..1535dc2a2 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2168,7 +2168,6 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 break; } - case BuiltinProc_simd_vector: { Operand x = {}; Operand y = {}; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 72ba3982c..5623f75ec 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1313,8 +1313,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue m = lb_build_expr(p, ce->args[0]); return lb_emit_matrix_flatten(p, m, tv.type); } - - + // "Intrinsics" case BuiltinProc_alloca: diff --git a/src/types.cpp b/src/types.cpp index 32e26bcc6..716ebe31f 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1334,12 +1334,16 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { } bool is_type_valid_for_matrix_elems(Type *t) { + t = base_type(t); if (is_type_integer(t)) { return true; } else if (is_type_float(t)) { return true; } else if (is_type_complex(t)) { return true; + } + if (t->kind == Type_Generic) { + return true; } return false; } -- cgit v1.2.3 From a440d8d812223961f0934aefecaef4975a604c43 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 13:10:56 +0100 Subject: Improve use of vector muladd operations --- src/llvm_abi.cpp | 13 ++++++++++++- src/llvm_backend.hpp | 4 +++- src/llvm_backend_expr.cpp | 10 ++++------ src/llvm_backend_proc.cpp | 11 +---------- src/llvm_backend_utility.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 18 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp index 8d3d5542f..9e7f4b290 100644 --- a/src/llvm_abi.cpp +++ b/src/llvm_abi.cpp @@ -153,7 +153,18 @@ void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType *ft, ProcCa // TODO(bill): Clean up this logic if (!is_arch_wasm()) { cc_kind = lb_calling_convention_map[calling_convention]; - } + } + // if (build_context.metrics.arch == TargetArch_amd64) { + // if (build_context.metrics.os == TargetOs_windows) { + // if (cc_kind == lbCallingConvention_C) { + // cc_kind = lbCallingConvention_Win64; + // } + // } else { + // if (cc_kind == lbCallingConvention_C) { + // cc_kind = lbCallingConvention_X86_64_SysV; + // } + // } + // } LLVMSetFunctionCallConv(fn, cc_kind); if (calling_convention == ProcCC_Odin) { unsigned context_index = offset+arg_count; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index d2abed354..4aea88f47 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -472,7 +472,7 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align); -enum lbCallingConventionKind { +enum lbCallingConventionKind : unsigned { lbCallingConvention_C = 0, lbCallingConvention_Fast = 8, lbCallingConvention_Cold = 9, @@ -517,6 +517,8 @@ enum lbCallingConventionKind { lbCallingConvention_AMDGPU_LS = 95, lbCallingConvention_AMDGPU_ES = 96, lbCallingConvention_AArch64_VectorCall = 97, + lbCallingConvention_AArch64_SVE_VectorCall = 98, + lbCallingConvention_WASM_EmscriptenInvoke = 99, lbCallingConvention_MaxID = 1023, }; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 7ae1a7315..ad2c609ef 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -837,11 +837,10 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type LLVMValueRef vector = nullptr; for (i64 i = 0; i < column_count; i++) { - LLVMValueRef product = llvm_vector_mul(p, m_columns[i], v_rows[i]); if (i == 0) { - vector = product; + vector = llvm_vector_mul(p, m_columns[i], v_rows[i]); } else { - vector = llvm_vector_add(p, vector, product); + vector = llvm_vector_mul_add(p, m_columns[i], v_rows[i], vector); } } @@ -914,11 +913,10 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type LLVMValueRef vector = nullptr; for (i64 i = 0; i < row_count; i++) { - LLVMValueRef product = llvm_vector_mul(p, v_rows[i], m_columns[i]); if (i == 0) { - vector = product; + vector = llvm_vector_mul(p, v_rows[i], m_columns[i]); } else { - vector = llvm_vector_add(p, vector, product); + vector = llvm_vector_mul_add(p, v_rows[i], m_columns[i], vector); } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5423ab51b..96bbbcee6 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -127,16 +127,7 @@ lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body) lb_ensure_abi_function_type(m, p); lb_add_function_type_attributes(p->value, p->abi_function_type, p->abi_function_type->calling_convention); - if (false) { - lbCallingConventionKind cc_kind = lbCallingConvention_C; - // TODO(bill): Clean up this logic - if (!is_arch_wasm()) { - cc_kind = lb_calling_convention_map[pt->Proc.calling_convention]; - } - LLVMSetFunctionCallConv(p->value, cc_kind); - } - - + if (pt->Proc.diverging) { lb_add_attribute_to_proc(m, p->value, "noreturn"); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index af773d467..9bb22b50b 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1631,4 +1631,48 @@ LLVMValueRef llvm_vector_mul(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef llvm_vector_dot(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { return llvm_vector_reduce_add(p, llvm_vector_mul(p, a, b)); +} + +LLVMValueRef llvm_vector_mul_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, LLVMValueRef c) { + lbModule *m = p->module; + + LLVMTypeRef t = LLVMTypeOf(a); + GB_ASSERT(t == LLVMTypeOf(b)); + GB_ASSERT(t == LLVMTypeOf(c)); + GB_ASSERT(LLVMGetTypeKind(t) == LLVMVectorTypeKind); + + LLVMTypeRef elem = LLVMGetElementType(t); + + bool is_possible = false; + + switch (LLVMGetTypeKind(elem)) { + case LLVMHalfTypeKind: + is_possible = true; + break; + case LLVMFloatTypeKind: + case LLVMDoubleTypeKind: + is_possible = true; + break; + } + + if (is_possible) { + char const *name = "llvm.fmuladd"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + + LLVMTypeRef types[1] = {}; + types[0] = t; + + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + LLVMValueRef values[3] = {}; + values[0] = a; + values[1] = b; + values[2] = c; + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + return call; + } else { + LLVMValueRef x = llvm_vector_mul(p, a, b); + LLVMValueRef y = llvm_vector_add(p, x, c); + return y; + } } \ No newline at end of file -- cgit v1.2.3