From 4c655865e5d9af83a98c137609b01972f4e51beb Mon Sep 17 00:00:00 2001
From: gingerBill <bill@gingerbill.org>
Date: Mon, 18 Oct 2021 16:52:19 +0100
Subject: Begin work on matrix type

---
 src/check_type.cpp | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'src/check_type.cpp')

diff --git a/src/check_type.cpp b/src/check_type.cpp
index 0d5c0f977..e752f192d 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -2200,6 +2200,63 @@ void check_map_type(CheckerContext *ctx, Type *type, Ast *node) {
 	// error(node, "'map' types are not yet implemented");
 }
 
+void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) {
+	ast_node(mt, MatrixType, node);
+	
+	Operand row = {};
+	Operand column = {};
+	
+	i64 row_count = check_array_count(ctx, &row, mt->row_count);
+	i64 column_count = check_array_count(ctx, &column, mt->column_count);
+	
+	Type *elem = check_type_expr(ctx, mt->elem, nullptr);
+	
+	Type *generic_row = nullptr;
+	Type *generic_column = nullptr;
+	
+	if (row.mode == Addressing_Type && row.type->kind == Type_Generic) {
+		generic_row = row.type;
+	}
+	
+	if (column.mode == Addressing_Type && column.type->kind == Type_Generic) {
+		generic_column = column.type;
+	}
+	
+	if (row_count < MIN_MATRIX_ELEMENT_COUNT && generic_row == nullptr) {
+		gbString s = expr_to_string(row.expr);
+		error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s);
+		gb_string_free(s);
+	}
+	
+	if (column_count < MIN_MATRIX_ELEMENT_COUNT && generic_column == nullptr) {
+		gbString s = expr_to_string(column.expr);
+		error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s);
+		gb_string_free(s);
+	}
+	
+	if (row_count*column_count > MAX_MATRIX_ELEMENT_COUNT) {
+		i64 element_count = row_count*column_count;
+		error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count);
+	}
+	
+	if (is_type_integer(elem)) {
+		// okay
+	} else if (is_type_float(elem)) {
+		// okay
+	} else if (is_type_complex(elem)) {
+		// okay
+	} else {
+		gbString s = type_to_string(elem);
+		error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s);
+		gb_string_free(s);
+	}
+	
+	*type = alloc_type_matrix(elem, row_count, column_count, generic_row, generic_column);
+	
+	return;
+}
+
+
 
 Type *make_soa_struct_internal(CheckerContext *ctx, Ast *array_typ_expr, Ast *elem_expr, Type *elem, i64 count, Type *generic_type, StructSoaKind soa_kind) {
 	Type *bt_elem = base_type(elem);
@@ -2785,6 +2842,17 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t
 			return true;
 		}
 	case_end;
+	
+	
+	case_ast_node(mt, MatrixType, e);
+		bool ips = ctx->in_polymorphic_specialization;
+		defer (ctx->in_polymorphic_specialization = ips);
+		ctx->in_polymorphic_specialization = false;
+
+		check_matrix_type(ctx, type, e);
+		set_base_type(named_type, *type);
+		return true;
+	case_end;
 	}
 
 	*type = t_invalid;
-- 
cgit v1.2.3


From cee45c1b155fcc917c2b0f9cfdbfa060304255e1 Mon Sep 17 00:00:00 2001
From: gingerBill <bill@gingerbill.org>
Date: Wed, 20 Oct 2021 02:18:30 +0100
Subject: Add `hadamard_product`

---
 src/check_builtin.cpp         | 56 ++++++++++++++++++++++++++++++++++++++++++-
 src/check_type.cpp            | 12 +++-------
 src/checker_builtin_procs.hpp |  2 ++
 src/llvm_backend_expr.cpp     |  6 ++---
 src/llvm_backend_proc.cpp     | 10 ++++++++
 src/types.cpp                 | 11 +++++++++
 6 files changed, 84 insertions(+), 13 deletions(-)

(limited to 'src/check_type.cpp')

diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp
index 1d033932f..a9427d4e0 100644
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -2056,6 +2056,14 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 			return false;
 		}
 		
+		Type *elem = xt->Array.elem;
+		
+		if (!is_type_valid_for_matrix_elems(elem)) {
+			gbString s = type_to_string(elem);
+			error(call, "Matrix elements types are limited to integers, floats, and complex, got %s", s);
+			gb_string_free(s);
+		}
+		
 		if (xt->Array.count == 0 || yt->Array.count == 0) {
 			gbString s1 = type_to_string(x.type);
 			gbString s2 = type_to_string(y.type);
@@ -2072,7 +2080,53 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 		}
 		
 		operand->mode = Addressing_Value;
-		operand->type = alloc_type_matrix(xt->Array.elem, xt->Array.count, yt->Array.count);	
+		operand->type = alloc_type_matrix(elem, xt->Array.count, yt->Array.count);	
+		operand->type = check_matrix_type_hint(operand->type, type_hint);
+		break;
+	}
+	
+	case BuiltinProc_hadamard_product: {
+		Operand x = {};
+		Operand y = {};
+		check_expr(c, &x, ce->args[0]);
+		if (x.mode == Addressing_Invalid) {
+			return false;
+		}
+		check_expr(c, &y, ce->args[1]);
+		if (y.mode == Addressing_Invalid) {
+			return false;
+		}
+		if (!is_operand_value(x) || !is_operand_value(y)) {
+			error(call, "'%.*s' expects a matrix or array types", LIT(builtin_name));
+			return false;
+		}
+		if (!is_type_matrix(x.type) && !is_type_array(y.type)) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' expects matrix or array values, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		if (!are_types_identical(x.type, y.type)) {
+			gbString s1 = type_to_string(x.type);
+			gbString s2 = type_to_string(y.type);
+			error(call, "'%.*s' values of the same type, got %s and %s", LIT(builtin_name), s1, s2);
+			gb_string_free(s2);
+			gb_string_free(s1);
+			return false;
+		}
+		
+		Type *elem = core_array_type(x.type);
+		if (!is_type_valid_for_matrix_elems(elem)) {
+			gbString s = type_to_string(elem);
+			error(call, "'%.*s' expects elements to be types are limited to integers, floats, and complex, got %s", LIT(builtin_name), s);
+			gb_string_free(s);
+		}
+		
+		operand->mode = Addressing_Value;
+		operand->type = x.type;
 		operand->type = check_matrix_type_hint(operand->type, type_hint);
 		break;
 	}
diff --git a/src/check_type.cpp b/src/check_type.cpp
index e752f192d..d9302c65a 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -997,8 +997,8 @@ void check_bit_set_type(CheckerContext *c, Type *type, Type *named_type, Ast *no
 
 				GB_ASSERT(lower <= upper);
 
-				i64 bits = MAX_BITS;
-				if (bs->underlying != nullptr) {
+				i64 bits = MAX_BITS
+;				if (bs->underlying != nullptr) {
 					Type *u = check_type(c, bs->underlying);
 					if (!is_type_integer(u)) {
 						gbString ts = type_to_string(u);
@@ -2239,13 +2239,7 @@ void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) {
 		error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count);
 	}
 	
-	if (is_type_integer(elem)) {
-		// okay
-	} else if (is_type_float(elem)) {
-		// okay
-	} else if (is_type_complex(elem)) {
-		// okay
-	} else {
+	if (!is_type_valid_for_matrix_elems(elem)) {
 		gbString s = type_to_string(elem);
 		error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s);
 		gb_string_free(s);
diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp
index 2c7392b09..de4e99d14 100644
--- a/src/checker_builtin_procs.hpp
+++ b/src/checker_builtin_procs.hpp
@@ -37,6 +37,7 @@ enum BuiltinProcId {
 	
 	BuiltinProc_transpose,
 	BuiltinProc_outer_product,
+	BuiltinProc_hadamard_product,
 
 	BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures
 
@@ -280,6 +281,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 	
 	{STR_LIT("transpose"),        1, false, Expr_Expr, BuiltinProcPkg_builtin},
 	{STR_LIT("outer_product"),    2, false, Expr_Expr, BuiltinProcPkg_builtin},
+	{STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin},
 
 	{STR_LIT(""),                 0, true,  Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE
 
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index 27f12a829..b894bc7b8 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -672,13 +672,13 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 
 
-lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) {
+lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type, bool component_wise=false) {
 	GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type));
 	
 	Type *xt = base_type(lhs.type);
 	Type *yt = base_type(rhs.type);
 	
-	if (op == Token_Mul) {
+	if (op == Token_Mul && !component_wise) {
 		if (xt->kind == Type_Matrix) {
 			if (yt->kind == Type_Matrix) {
 				return lb_emit_matrix_mul(p, lhs, rhs, type);
@@ -703,7 +703,7 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue
 		array_lhs.type = array_type; 
 		array_rhs.type = array_type;
 
-		lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, type);
+		lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, array_type);
 		array.type = type;
 		return array;
 	}
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index 5a7fc1626..da4e4ad28 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -1270,6 +1270,16 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv,
 			lbValue b = lb_build_expr(p, ce->args[1]);
 			return lb_emit_outer_product(p, a, b, tv.type);
 		}
+	case BuiltinProc_hadamard_product:
+		{
+			lbValue a = lb_build_expr(p, ce->args[0]);
+			lbValue b = lb_build_expr(p, ce->args[1]);
+			if (is_type_array(tv.type)) {
+				return lb_emit_arith(p, Token_Mul, a, b, tv.type);
+			}
+			GB_ASSERT(is_type_matrix(tv.type));
+			return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true);
+		}
 
 
 	// "Intrinsics"
diff --git a/src/types.cpp b/src/types.cpp
index eaf1bac74..32e26bcc6 100644
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -1333,6 +1333,17 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) {
 	return stride_elems*column_index + row_index;
 }
 
+bool is_type_valid_for_matrix_elems(Type *t) {
+	if (is_type_integer(t)) {
+		return true;
+	} else if (is_type_float(t)) {
+		return true;
+	} else if (is_type_complex(t)) {
+		return true;
+	}
+	return false;
+}
+
 bool is_type_dynamic_array(Type *t) {
 	t = base_type(t);
 	return t->kind == Type_DynamicArray;
-- 
cgit v1.2.3


From e0b9475378f4d69ebaf3e141ed941674b2c0d3f3 Mon Sep 17 00:00:00 2001
From: gingerBill <bill@gingerbill.org>
Date: Thu, 21 Oct 2021 01:14:44 +0100
Subject: Allow casting between square matrices of the same element type

---
 src/check_expr.cpp        | 19 +++++++++++++++++++
 src/check_type.cpp        | 10 +++++-----
 src/llvm_backend_expr.cpp | 44 ++++++++++++++++++++++++++++++++------------
 3 files changed, 56 insertions(+), 17 deletions(-)

(limited to 'src/check_type.cpp')

diff --git a/src/check_expr.cpp b/src/check_expr.cpp
index 498bf78c7..ad12e00c8 100644
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2460,6 +2460,24 @@ bool check_is_castable_to(CheckerContext *c, Operand *operand, Type *y) {
 	if (is_type_quaternion(src) && is_type_quaternion(dst)) {
 		return true;
 	}
+	
+	if (is_type_matrix(src) && is_type_matrix(dst)) {
+		GB_ASSERT(src->kind == Type_Matrix);
+		GB_ASSERT(dst->kind == Type_Matrix);
+		if (!are_types_identical(src->Matrix.elem, dst->Matrix.elem)) {
+			return false;
+		}
+		
+		if (src->Matrix.row_count != src->Matrix.column_count) {
+			return false;
+		}
+		
+		if (dst->Matrix.row_count != dst->Matrix.column_count) {
+			return false;
+		}
+		
+		return true;
+	}
 
 
 	// Cast between pointers
@@ -8838,6 +8856,7 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
 	case Ast_EnumType:
 	case Ast_MapType:
 	case Ast_BitSetType:
+	case Ast_MatrixType:
 		o->mode = Addressing_Type;
 		o->type = check_type(c, node);
 		break;
diff --git a/src/check_type.cpp b/src/check_type.cpp
index d9302c65a..21c8a9f19 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -1154,7 +1154,11 @@ Type *determine_type_from_polymorphic(CheckerContext *ctx, Type *poly_type, Oper
 	bool show_error = modify_type && !ctx->hide_polymorphic_errors;
 	if (!is_operand_value(operand)) {
 		if (show_error) {
-			error(operand.expr, "Cannot determine polymorphic type from parameter");
+			gbString pts = type_to_string(poly_type);
+			gbString ots = type_to_string(operand.type);
+			defer (gb_string_free(pts));
+			defer (gb_string_free(ots));
+			error(operand.expr, "Cannot determine polymorphic type from parameter: '%s' to '%s'", ots, pts);
 		}
 		return t_invalid;
 	}
@@ -2839,10 +2843,6 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t
 	
 	
 	case_ast_node(mt, MatrixType, e);
-		bool ips = ctx->in_polymorphic_specialization;
-		defer (ctx->in_polymorphic_specialization = ips);
-		ctx->in_polymorphic_specialization = false;
-
 		check_matrix_type(ctx, type, e);
 		set_base_type(named_type, *type);
 		return true;
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index cdc1deea1..9582be93c 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -476,7 +476,7 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r
 	}
 }
 
-bool lb_matrix_elem_simple(Type *t) {
+bool lb_is_matrix_simdable(Type *t) {
 	Type *mt = base_type(t);
 	GB_ASSERT(mt->kind == Type_Matrix);
 	
@@ -555,7 +555,7 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) {
 	Type *mt = base_type(m.type);
 	GB_ASSERT(mt->kind == Type_Matrix);
 	
-	if (lb_matrix_elem_simple(mt)) {
+	if (lb_is_matrix_simdable(mt)) {
 		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
 		unsigned row_count    = cast(unsigned)mt->Matrix.row_count;
 		unsigned column_count = cast(unsigned)mt->Matrix.column_count;
@@ -623,7 +623,7 @@ lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type) {
 	Type *mt = base_type(m.type);
 	GB_ASSERT(mt->kind == Type_Matrix);
 	
-	if (lb_matrix_elem_simple(mt)) {
+	if (lb_is_matrix_simdable(mt)) {
 		LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m);
 		return lb_matrix_cast_vector_to_type(p, vector, type);
 	}
@@ -690,7 +690,7 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type)
 	unsigned inner         = cast(unsigned)xt->Matrix.column_count;
 	unsigned outer_columns = cast(unsigned)yt->Matrix.column_count;
 		
-	if (lb_matrix_elem_simple(xt)) {
+	if (lb_is_matrix_simdable(xt)) {
 		unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt);
 		unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt);
 		
@@ -773,7 +773,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 	
 	Type *elem = mt->Matrix.elem;
 	
-	if (lb_matrix_elem_simple(mt)) {
+	if (lb_is_matrix_simdable(mt)) {
 		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
 		
 		unsigned row_count = cast(unsigned)mt->Matrix.row_count;
@@ -819,9 +819,8 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 			
 			lbValue a = lb_emit_matrix_ev(p, lhs, i, j);
 			lbValue b = lb_emit_struct_ev(p, rhs, cast(i32)j);
-			lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem);
-			lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem);
-			lb_emit_store(p, dst, d);
+			lbValue c = lb_emit_mul_add(p, a, b, d0, elem);
+			lb_emit_store(p, dst, c);
 		}
 	}
 	
@@ -842,7 +841,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 	
 	Type *elem = mt->Matrix.elem;
 	
-	if (lb_matrix_elem_simple(mt)) {
+	if (lb_is_matrix_simdable(mt)) {
 		unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
 		
 		unsigned row_count = cast(unsigned)mt->Matrix.row_count;
@@ -903,9 +902,8 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 			
 			lbValue a = lb_emit_struct_ev(p, lhs, cast(i32)k);
 			lbValue b = lb_emit_matrix_ev(p, rhs, k, j);
-			lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem);
-			lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem);
-			lb_emit_store(p, dst, d);
+			lbValue c = lb_emit_mul_add(p, a, b, d0, elem);
+			lb_emit_store(p, dst, c);
 		}
 	}
 	
@@ -1938,6 +1936,28 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
 		
 		return lb_addr_load(p, v);
 	}
+	
+	if (is_type_matrix(dst) && is_type_matrix(src)) {
+		GB_ASSERT(dst->kind == Type_Matrix);
+		GB_ASSERT(src->kind == Type_Matrix);
+		lbAddr v = lb_add_local_generated(p, t, true);
+		for (i64 j = 0; j < dst->Matrix.column_count; j++) {
+			for (i64 i = 0; i < dst->Matrix.row_count; i++) {
+				if (i < src->Matrix.row_count && j < src->Matrix.column_count) {
+					lbValue d = lb_emit_matrix_epi(p, v.addr, i, j);
+					lbValue s = lb_emit_matrix_ev(p, value, i, j);
+					lb_emit_store(p, d, s);
+				} else if (i == j) {
+					lbValue d = lb_emit_matrix_epi(p, v.addr, i, j);
+					lbValue s = lb_const_value(p->module, dst->Matrix.elem, exact_value_i64(1), true);
+					lb_emit_store(p, d, s);
+				}
+			}
+		}
+		return lb_addr_load(p, v);
+	}	
+	
+	
 
 	if (is_type_any(dst)) {
 		if (is_type_untyped_nil(src)) {
-- 
cgit v1.2.3


From 306bdf8869f2c9676e73acbf477a302c08137087 Mon Sep 17 00:00:00 2001
From: gingerBill <bill@gingerbill.org>
Date: Mon, 25 Oct 2021 00:46:50 +0100
Subject: Update alignment rules for `matrix` types as a compromise to keep
 zero padding

---
 src/check_builtin.cpp        |   4 +-
 src/check_type.cpp           |  12 +--
 src/llvm_backend_expr.cpp    |  23 ++++-
 src/llvm_backend_general.cpp |   8 +-
 src/llvm_backend_utility.cpp |   2 +-
 src/types.cpp                | 205 ++++++++++++++++++++++++-------------------
 6 files changed, 147 insertions(+), 107 deletions(-)

(limited to 'src/check_type.cpp')

diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp
index 9b94be002..2373317c3 100644
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -2083,8 +2083,8 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32
 		}
 		
 		i64 max_count = xt->Array.count*yt->Array.count;
-		if (max_count > MAX_MATRIX_ELEMENT_COUNT) {
-			error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MAX_MATRIX_ELEMENT_COUNT);
+		if (max_count > MATRIX_ELEMENT_COUNT_MAX) {
+			error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MATRIX_ELEMENT_COUNT_MAX);
 			return false;
 		}
 		
diff --git a/src/check_type.cpp b/src/check_type.cpp
index 21c8a9f19..813990020 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -2226,21 +2226,21 @@ void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) {
 		generic_column = column.type;
 	}
 	
-	if (row_count < MIN_MATRIX_ELEMENT_COUNT && generic_row == nullptr) {
+	if (row_count < MATRIX_ELEMENT_COUNT_MIN && generic_row == nullptr) {
 		gbString s = expr_to_string(row.expr);
-		error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s);
+		error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s);
 		gb_string_free(s);
 	}
 	
-	if (column_count < MIN_MATRIX_ELEMENT_COUNT && generic_column == nullptr) {
+	if (column_count < MATRIX_ELEMENT_COUNT_MIN && generic_column == nullptr) {
 		gbString s = expr_to_string(column.expr);
-		error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s);
+		error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s);
 		gb_string_free(s);
 	}
 	
-	if (row_count*column_count > MAX_MATRIX_ELEMENT_COUNT) {
+	if (row_count*column_count > MATRIX_ELEMENT_COUNT_MAX) {
 		i64 element_count = row_count*column_count;
-		error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count);
+		error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MATRIX_ELEMENT_COUNT_MAX, cast(long long)element_count);
 	}
 	
 	if (!is_type_valid_for_matrix_elems(elem)) {
diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp
index 9c114882e..fa2b0b084 100644
--- a/src/llvm_backend_expr.cpp
+++ b/src/llvm_backend_expr.cpp
@@ -511,10 +511,16 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) {
 	unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt);
 	LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count);
 	
+#if 1
 	LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value;
 	LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), "");
 	LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, "");
+	LLVMSetAlignment(matrix_vector, cast(unsigned)type_align_of(mt));
 	return matrix_vector;
+#else
+	LLVMValueRef matrix_vector = LLVMBuildBitCast(p->builder, matrix.value, total_matrix_type, "");
+	return matrix_vector;
+#endif
 }
 
 LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) {
@@ -524,7 +530,6 @@ LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) {
 	unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
 	unsigned row_count = cast(unsigned)mt->Matrix.row_count;
 	unsigned column_count = cast(unsigned)mt->Matrix.column_count;
-	
 	unsigned mask_elems_index = 0;
 	auto mask_elems = slice_make<LLVMValueRef>(permanent_allocator(), row_count*column_count);
 	for (unsigned j = 0; j < column_count; j++) {
@@ -540,7 +545,17 @@ LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) {
 
 LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) {
 	LLVMValueRef vector = lb_matrix_to_vector(p, m);
-	LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, m.type);
+	
+	Type *mt = base_type(m.type);
+	GB_ASSERT(mt->kind == Type_Matrix);
+	
+	unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt);
+	unsigned row_count = cast(unsigned)mt->Matrix.row_count;
+	if (stride == row_count) {
+		return vector;
+	}
+	
+	LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, mt);
 	LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, "");
 	return trimmed_vector;
 }
@@ -791,7 +806,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 		
 		for (unsigned row_index = 0; row_index < column_count; row_index++) {
 			LLVMValueRef value = lb_emit_struct_ev(p, rhs, row_index).value;
-			LLVMValueRef row = llvm_splat(p, value, row_count);
+			LLVMValueRef row = llvm_vector_broadcast(p, value, row_count);
 			v_rows[row_index] = row;
 		}
 		
@@ -866,7 +881,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type
 		
 		for (unsigned column_index = 0; column_index < row_count; column_index++) {
 			LLVMValueRef value = lb_emit_struct_ev(p, lhs, column_index).value;
-			LLVMValueRef row = llvm_splat(p, value, column_count);
+			LLVMValueRef row = llvm_vector_broadcast(p, value, column_count);
 			v_rows[column_index] = row;
 		}
 		
diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp
index 01221cad6..7aa7c7cdd 100644
--- a/src/llvm_backend_general.cpp
+++ b/src/llvm_backend_general.cpp
@@ -512,8 +512,7 @@ void lb_emit_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValu
 	}
 }
 
-bool lb_try_update_alignment(lbValue ptr, unsigned alignment)  {
-	LLVMValueRef addr_ptr = ptr.value;
+bool lb_try_update_alignment(LLVMValueRef addr_ptr, unsigned alignment) {
 	if (LLVMIsAGlobalValue(addr_ptr) || LLVMIsAAllocaInst(addr_ptr) || LLVMIsALoadInst(addr_ptr)) {
 		if (LLVMGetAlignment(addr_ptr) < alignment) {
 			if (LLVMIsAAllocaInst(addr_ptr) || LLVMIsAGlobalValue(addr_ptr)) {
@@ -525,6 +524,11 @@ bool lb_try_update_alignment(lbValue ptr, unsigned alignment)  {
 	return false;
 }
 
+bool lb_try_update_alignment(lbValue ptr, unsigned alignment) {
+	return lb_try_update_alignment(ptr.value, alignment);
+}
+
+
 bool lb_try_vector_cast(lbModule *m, lbValue ptr, LLVMTypeRef *vector_type_) {
 	Type *array_type = base_type(type_deref(ptr.type));
 	GB_ASSERT(is_type_array_like(array_type));
diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp
index 6754ce798..e458c0692 100644
--- a/src/llvm_backend_utility.cpp
+++ b/src/llvm_backend_utility.cpp
@@ -1526,7 +1526,7 @@ LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) {
 	return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count));
 }
 
-LLVMValueRef llvm_splat(lbProcedure *p, LLVMValueRef value, unsigned count) {
+LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned count) {
 	GB_ASSERT(count > 0);
 	if (LLVMIsConstant(value)) {
 		LLVMValueRef single = LLVMConstVector(&value, 1);
diff --git a/src/types.cpp b/src/types.cpp
index 3abcebdfb..bfedb5381 100644
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -360,8 +360,8 @@ enum TypeInfoFlag : u32 {
 
 
 enum : int {
-	MIN_MATRIX_ELEMENT_COUNT = 1,
-	MAX_MATRIX_ELEMENT_COUNT = 16,
+	MATRIX_ELEMENT_COUNT_MIN = 1,
+	MATRIX_ELEMENT_COUNT_MAX = 16,
 };
 
 
@@ -700,6 +700,74 @@ bool is_type_pointer(Type *t);
 bool is_type_slice(Type *t);
 bool is_type_integer(Type *t);
 bool type_set_offsets(Type *t);
+Type *base_type(Type *t);
+
+i64 type_size_of_internal(Type *t, TypePath *path);
+i64 type_align_of_internal(Type *t, TypePath *path);
+
+
+// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on?
+
+struct TypePath {
+	Array<Entity *> path; // Entity_TypeName;
+	bool failure;
+};
+
+
+void type_path_init(TypePath *tp) {
+	tp->path.allocator = heap_allocator();
+}
+
+void type_path_free(TypePath *tp) {
+	array_free(&tp->path);
+}
+
+void type_path_print_illegal_cycle(TypePath *tp, isize start_index) {
+	GB_ASSERT(tp != nullptr);
+
+	GB_ASSERT(start_index < tp->path.count);
+	Entity *e = tp->path[start_index];
+	GB_ASSERT(e != nullptr);
+	error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string));
+	// NOTE(bill): Print cycle, if it's deep enough
+	for (isize j = start_index; j < tp->path.count; j++) {
+		Entity *e = tp->path[j];
+		error(e->token, "\t%.*s refers to", LIT(e->token.string));
+	}
+	// NOTE(bill): This will only print if the path count > 1
+	error(e->token, "\t%.*s", LIT(e->token.string));
+	tp->failure = true;
+	e->type->failure = true;
+	base_type(e->type)->failure = true;
+}
+
+bool type_path_push(TypePath *tp, Type *t) {
+	GB_ASSERT(tp != nullptr);
+	if (t->kind != Type_Named) {
+		return false;
+	}
+	Entity *e = t->Named.type_name;
+
+	for (isize i = 0; i < tp->path.count; i++) {
+		Entity *p = tp->path[i];
+		if (p == e) {
+			type_path_print_illegal_cycle(tp, i);
+		}
+	}
+
+	array_add(&tp->path, e);
+	return true;
+}
+
+void type_path_pop(TypePath *tp) {
+	if (tp != nullptr && tp->path.count > 0) {
+		array_pop(&tp->path);
+	}
+}
+
+
+#define FAILURE_SIZE      0
+#define FAILURE_ALIGNMENT 0
 
 void init_type_mutex(void) {
 	mutex_init(&g_type_mutex);
@@ -1251,6 +1319,42 @@ bool is_type_matrix(Type *t) {
 	return t->kind == Type_Matrix;
 }
 
+i64 matrix_align_of(Type *t, struct TypePath *tp) {
+	t = base_type(t);
+	GB_ASSERT(t->kind == Type_Matrix);
+	
+	Type *elem = t->Matrix.elem;
+	i64 row_count = gb_max(t->Matrix.row_count, 1);
+
+	bool pop = type_path_push(tp, elem);
+	if (tp->failure) {
+		return FAILURE_ALIGNMENT;
+	}
+
+	i64 elem_align = type_align_of_internal(elem, tp);
+	if (pop) type_path_pop(tp);
+	
+	i64 elem_size = type_size_of(elem);
+	
+
+	// NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding
+	// It would be better for performance to pad each column so that each column
+	// could be maximally aligned but as a compromise, having no padding will be
+	// beneficial to third libraries that assume no padding
+	
+	i64 total_expected_size = row_count*t->Matrix.column_count*elem_size;
+	// i64 min_alignment = prev_pow2(elem_align * row_count);
+	i64 min_alignment = prev_pow2(total_expected_size);
+	while ((total_expected_size % min_alignment) != 0) {
+		min_alignment >>= 1;
+	}
+	GB_ASSERT(min_alignment >= elem_align);
+	
+	i64 align = gb_min(min_alignment, build_context.max_align);
+	return align;
+}
+
+
 i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) {
 	t = base_type(t);
 	GB_ASSERT(t->kind == Type_Matrix);
@@ -1266,21 +1370,16 @@ i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) {
 	} else {
 		elem_size = type_size_of(t->Matrix.elem);
 	}
-	
 
 	i64 stride_in_bytes = 0;
 	
+	// NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding
+	// It would be better for performance to pad each column so that each column
+	// could be maximally aligned but as a compromise, having no padding will be
+	// beneficial to third libraries that assume no padding
 	i64 row_count = t->Matrix.row_count;
-#if 0	
-	if (row_count == 1) {
-		stride_in_bytes = elem_size;
-	} else {	
-		i64 matrix_alignment = type_align_of(t);
-		stride_in_bytes = align_formula(elem_size*row_count, matrix_alignment);
-	}
-#else
 	stride_in_bytes = elem_size*row_count;
-#endif
+	
 	t->Matrix.stride_in_bytes = stride_in_bytes;
 	return stride_in_bytes;
 }
@@ -2969,71 +3068,6 @@ Slice<i32> struct_fields_index_by_increasing_offset(gbAllocator allocator, Type
 
 
-
-// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on?
-
-struct TypePath {
-	Array<Entity *> path; // Entity_TypeName;
-	bool failure;
-};
-
-
-void type_path_init(TypePath *tp) {
-	tp->path.allocator = heap_allocator();
-}
-
-void type_path_free(TypePath *tp) {
-	array_free(&tp->path);
-}
-
-void type_path_print_illegal_cycle(TypePath *tp, isize start_index) {
-	GB_ASSERT(tp != nullptr);
-
-	GB_ASSERT(start_index < tp->path.count);
-	Entity *e = tp->path[start_index];
-	GB_ASSERT(e != nullptr);
-	error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string));
-	// NOTE(bill): Print cycle, if it's deep enough
-	for (isize j = start_index; j < tp->path.count; j++) {
-		Entity *e = tp->path[j];
-		error(e->token, "\t%.*s refers to", LIT(e->token.string));
-	}
-	// NOTE(bill): This will only print if the path count > 1
-	error(e->token, "\t%.*s", LIT(e->token.string));
-	tp->failure = true;
-	e->type->failure = true;
-	base_type(e->type)->failure = true;
-}
-
-bool type_path_push(TypePath *tp, Type *t) {
-	GB_ASSERT(tp != nullptr);
-	if (t->kind != Type_Named) {
-		return false;
-	}
-	Entity *e = t->Named.type_name;
-
-	for (isize i = 0; i < tp->path.count; i++) {
-		Entity *p = tp->path[i];
-		if (p == e) {
-			type_path_print_illegal_cycle(tp, i);
-		}
-	}
-
-	array_add(&tp->path, e);
-	return true;
-}
-
-void type_path_pop(TypePath *tp) {
-	if (tp != nullptr && tp->path.count > 0) {
-		array_pop(&tp->path);
-	}
-}
-
-
-#define FAILURE_SIZE      0
-#define FAILURE_ALIGNMENT 0
-
-
 i64 type_size_of_internal (Type *t, TypePath *path);
 i64 type_align_of_internal(Type *t, TypePath *path);
 i64 type_size_of(Type *t);
@@ -3260,21 +3294,8 @@ i64 type_align_of_internal(Type *t, TypePath *path) {
 		return gb_clamp(next_pow2(type_size_of_internal(t, path)), 1, build_context.max_align);
 	}
 	
-	case Type_Matrix: {
-		Type *elem = t->Matrix.elem;
-		i64 row_count = gb_max(t->Matrix.row_count, 1);
-
-		bool pop = type_path_push(path, elem);
-		if (path->failure) {
-			return FAILURE_ALIGNMENT;
-		}
-		// elem align is used here rather than size as it make a little more sense
-		i64 elem_align = type_align_of_internal(elem, path);
-		if (pop) type_path_pop(path);
-		
-		i64 align = gb_min(next_pow2(elem_align * row_count), build_context.max_align);
-		return align;
-	}
+	case Type_Matrix: 
+		return matrix_align_of(t, path);
 
 	case Type_RelativePointer:
 		return type_align_of_internal(t->RelativePointer.base_integer, path);
-- 
cgit v1.2.3