Move `core:runtime` to `base:runtime`; keep alias around

author: gingerBill <bill@gingerbill.org> 2024-01-28 21:05:53 +0000
committer: gingerBill <bill@gingerbill.org> 2024-01-28 21:05:53 +0000
commit: 09fa1c29cd014b4560b3c79c72db68af20ef8187 (patch)
tree: 45095630fb03a50df20e0249f98879cf27d94397 /base/runtime
parent: ddcaa0de5395bfb1a2b004e6a6cb5e2ba1e2eed1 (diff)
38 files changed, 6774 insertions, 0 deletions
diff --git a/base/runtime/core.odin b/base/runtime/core.odin
new file mode 100644
index 000000000..740482493
--- /dev/null
+++ b/base/runtime/core.odin
@@ -0,0 +1,681 @@
+// This is the runtime code required by the compiler
+// IMPORTANT NOTE(bill): Do not change the order of any of this data
+// The compiler relies upon this _exact_ order
+//
+// Naming Conventions:
+// In general, Ada_Case for types and snake_case for values
+//
+// Package Name:       snake_case (but prefer single word)
+// Import Name:        snake_case (but prefer single word)
+// Types:              Ada_Case
+// Enum Values:        Ada_Case
+// Procedures:         snake_case
+// Local Variables:    snake_case
+// Constant Variables: SCREAMING_SNAKE_CASE
+//
+// IMPORTANT NOTE(bill): `type_info_of` cannot be used within a
+// #shared_global_scope due to  the internals of the compiler.
+// This could change at a later date if the all these data structures are
+// implemented within the compiler rather than in this "preload" file
+//
+//+no-instrumentation
+package runtime
+
+import "core:intrinsics"
+
+// NOTE(bill): This must match the compiler's
+Calling_Convention :: enum u8 {
+	Invalid     = 0,
+	Odin        = 1,
+	Contextless = 2,
+	CDecl       = 3,
+	Std_Call    = 4,
+	Fast_Call   = 5,
+
+	None        = 6,
+	Naked       = 7,
+
+	_           = 8, // reserved
+
+	Win64       = 9,
+	SysV        = 10,
+}
+
+Type_Info_Enum_Value :: distinct i64
+
+Platform_Endianness :: enum u8 {
+	Platform = 0,
+	Little   = 1,
+	Big      = 2,
+}
+
+// Procedure type to test whether two values of the same type are equal
+Equal_Proc :: distinct proc "contextless" (rawptr, rawptr) -> bool
+// Procedure type to hash a value, default seed value is 0
+Hasher_Proc :: distinct proc "contextless" (data: rawptr, seed: uintptr = 0) -> uintptr
+
+Type_Info_Struct_Soa_Kind :: enum u8 {
+	None    = 0,
+	Fixed   = 1,
+	Slice   = 2,
+	Dynamic = 3,
+}
+
+// Variant Types
+Type_Info_Named :: struct {
+	name: string,
+	base: ^Type_Info,
+	pkg:  string,
+	loc:  Source_Code_Location,
+}
+Type_Info_Integer    :: struct {signed: bool, endianness: Platform_Endianness}
+Type_Info_Rune       :: struct {}
+Type_Info_Float      :: struct {endianness: Platform_Endianness}
+Type_Info_Complex    :: struct {}
+Type_Info_Quaternion :: struct {}
+Type_Info_String     :: struct {is_cstring: bool}
+Type_Info_Boolean    :: struct {}
+Type_Info_Any        :: struct {}
+Type_Info_Type_Id    :: struct {}
+Type_Info_Pointer :: struct {
+	elem: ^Type_Info, // nil -> rawptr
+}
+Type_Info_Multi_Pointer :: struct {
+	elem: ^Type_Info,
+}
+Type_Info_Procedure :: struct {
+	params:     ^Type_Info, // Type_Info_Parameters
+	results:    ^Type_Info, // Type_Info_Parameters
+	variadic:   bool,
+	convention: Calling_Convention,
+}
+Type_Info_Array :: struct {
+	elem:      ^Type_Info,
+	elem_size: int,
+	count:     int,
+}
+Type_Info_Enumerated_Array :: struct {
+	elem:      ^Type_Info,
+	index:     ^Type_Info,
+	elem_size: int,
+	count:     int,
+	min_value: Type_Info_Enum_Value,
+	max_value: Type_Info_Enum_Value,
+	is_sparse: bool,
+}
+Type_Info_Dynamic_Array :: struct {elem: ^Type_Info, elem_size: int}
+Type_Info_Slice         :: struct {elem: ^Type_Info, elem_size: int}
+
+Type_Info_Parameters :: struct { // Only used for procedures parameters and results
+	types:        []^Type_Info,
+	names:        []string,
+}
+Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually
+
+Type_Info_Struct :: struct {
+	types:        []^Type_Info,
+	names:        []string,
+	offsets:      []uintptr,
+	usings:       []bool,
+	tags:         []string,
+	is_packed:    bool,
+	is_raw_union: bool,
+	is_no_copy:   bool,
+	custom_align: bool,
+
+	equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
+
+	// These are only set iff this structure is an SOA structure
+	soa_kind:      Type_Info_Struct_Soa_Kind,
+	soa_base_type: ^Type_Info,
+	soa_len:       int,
+}
+Type_Info_Union :: struct {
+	variants:     []^Type_Info,
+	tag_offset:   uintptr,
+	tag_type:     ^Type_Info,
+
+	equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
+
+	custom_align: bool,
+	no_nil:       bool,
+	shared_nil:   bool,
+}
+Type_Info_Enum :: struct {
+	base:      ^Type_Info,
+	names:     []string,
+	values:    []Type_Info_Enum_Value,
+}
+Type_Info_Map :: struct {
+	key:      ^Type_Info,
+	value:    ^Type_Info,
+	map_info: ^Map_Info,
+}
+Type_Info_Bit_Set :: struct {
+	elem:       ^Type_Info,
+	underlying: ^Type_Info, // Possibly nil
+	lower:      i64,
+	upper:      i64,
+}
+Type_Info_Simd_Vector :: struct {
+	elem:       ^Type_Info,
+	elem_size:  int,
+	count:      int,
+}
+Type_Info_Relative_Pointer :: struct {
+	pointer:      ^Type_Info, // ^T
+	base_integer: ^Type_Info,
+}
+Type_Info_Relative_Multi_Pointer :: struct {
+	pointer:      ^Type_Info, // [^]T
+	base_integer: ^Type_Info,
+}
+Type_Info_Matrix :: struct {
+	elem:         ^Type_Info,
+	elem_size:    int,
+	elem_stride:  int, // elem_stride >= row_count
+	row_count:    int,
+	column_count: int,
+	// Total element count = column_count * elem_stride
+}
+Type_Info_Soa_Pointer :: struct {
+	elem: ^Type_Info,
+}
+
+Type_Info_Flag :: enum u8 {
+	Comparable     = 0,
+	Simple_Compare = 1,
+}
+Type_Info_Flags :: distinct bit_set[Type_Info_Flag; u32]
+
+Type_Info :: struct {
+	size:  int,
+	align: int,
+	flags: Type_Info_Flags,
+	id:    typeid,
+
+	variant: union {
+		Type_Info_Named,
+		Type_Info_Integer,
+		Type_Info_Rune,
+		Type_Info_Float,
+		Type_Info_Complex,
+		Type_Info_Quaternion,
+		Type_Info_String,
+		Type_Info_Boolean,
+		Type_Info_Any,
+		Type_Info_Type_Id,
+		Type_Info_Pointer,
+		Type_Info_Multi_Pointer,
+		Type_Info_Procedure,
+		Type_Info_Array,
+		Type_Info_Enumerated_Array,
+		Type_Info_Dynamic_Array,
+		Type_Info_Slice,
+		Type_Info_Parameters,
+		Type_Info_Struct,
+		Type_Info_Union,
+		Type_Info_Enum,
+		Type_Info_Map,
+		Type_Info_Bit_Set,
+		Type_Info_Simd_Vector,
+		Type_Info_Relative_Pointer,
+		Type_Info_Relative_Multi_Pointer,
+		Type_Info_Matrix,
+		Type_Info_Soa_Pointer,
+	},
+}
+
+// NOTE(bill): This must match the compiler's
+Typeid_Kind :: enum u8 {
+	Invalid,
+	Integer,
+	Rune,
+	Float,
+	Complex,
+	Quaternion,
+	String,
+	Boolean,
+	Any,
+	Type_Id,
+	Pointer,
+	Multi_Pointer,
+	Procedure,
+	Array,
+	Enumerated_Array,
+	Dynamic_Array,
+	Slice,
+	Tuple,
+	Struct,
+	Union,
+	Enum,
+	Map,
+	Bit_Set,
+	Simd_Vector,
+	Relative_Pointer,
+	Relative_Multi_Pointer,
+	Matrix,
+	Soa_Pointer,
+}
+#assert(len(Typeid_Kind) < 32)
+
+// Typeid_Bit_Field :: bit_field #align(align_of(uintptr)) {
+// 	index:    8*size_of(uintptr) - 8,
+// 	kind:     5, // Typeid_Kind
+// 	named:    1,
+// 	special:  1, // signed, cstring, etc
+// 	reserved: 1,
+// }
+// #assert(size_of(Typeid_Bit_Field) == size_of(uintptr));
+
+// NOTE(bill): only the ones that are needed (not all types)
+// This will be set by the compiler
+type_table: []Type_Info
+
+args__: []cstring
+
+when ODIN_OS == .Windows {
+	// NOTE(Jeroen): If we're a Windows DLL, fwdReason will be populated.
+	// This tells a DLL if it's first loaded, about to be unloaded, or a thread is joining/exiting.
+
+	DLL_Forward_Reason :: enum u32 {
+		Process_Detach = 0, // About to unload DLL
+		Process_Attach = 1, // Entry point
+		Thread_Attach  = 2,
+		Thread_Detach  = 3,
+	}
+	dll_forward_reason: DLL_Forward_Reason
+}
+
+// IMPORTANT NOTE(bill): Must be in this order (as the compiler relies upon it)
+
+
+Source_Code_Location :: struct {
+	file_path:    string,
+	line, column: i32,
+	procedure:    string,
+}
+
+Assertion_Failure_Proc :: #type proc(prefix, message: string, loc: Source_Code_Location) -> !
+
+// Allocation Stuff
+Allocator_Mode :: enum byte {
+	Alloc,
+	Free,
+	Free_All,
+	Resize,
+	Query_Features,
+	Query_Info,
+	Alloc_Non_Zeroed,
+	Resize_Non_Zeroed,
+}
+
+Allocator_Mode_Set :: distinct bit_set[Allocator_Mode]
+
+Allocator_Query_Info :: struct {
+	pointer:   rawptr,
+	size:      Maybe(int),
+	alignment: Maybe(int),
+}
+
+Allocator_Error :: enum byte {
+	None                 = 0,
+	Out_Of_Memory        = 1,
+	Invalid_Pointer      = 2,
+	Invalid_Argument     = 3,
+	Mode_Not_Implemented = 4,
+}
+
+Allocator_Proc :: #type proc(allocator_data: rawptr, mode: Allocator_Mode,
+                             size, alignment: int,
+                             old_memory: rawptr, old_size: int,
+                             location: Source_Code_Location = #caller_location) -> ([]byte, Allocator_Error)
+Allocator :: struct {
+	procedure: Allocator_Proc,
+	data:      rawptr,
+}
+
+Byte     :: 1
+Kilobyte :: 1024 * Byte
+Megabyte :: 1024 * Kilobyte
+Gigabyte :: 1024 * Megabyte
+Terabyte :: 1024 * Gigabyte
+Petabyte :: 1024 * Terabyte
+Exabyte  :: 1024 * Petabyte
+
+// Logging stuff
+
+Logger_Level :: enum uint {
+	Debug   = 0,
+	Info    = 10,
+	Warning = 20,
+	Error   = 30,
+	Fatal   = 40,
+}
+
+Logger_Option :: enum {
+	Level,
+	Date,
+	Time,
+	Short_File_Path,
+	Long_File_Path,
+	Line,
+	Procedure,
+	Terminal_Color,
+	Thread_Id,
+}
+
+Logger_Options :: bit_set[Logger_Option]
+Logger_Proc :: #type proc(data: rawptr, level: Logger_Level, text: string, options: Logger_Options, location := #caller_location)
+
+Logger :: struct {
+	procedure:    Logger_Proc,
+	data:         rawptr,
+	lowest_level: Logger_Level,
+	options:      Logger_Options,
+}
+
+Context :: struct {
+	allocator:              Allocator,
+	temp_allocator:         Allocator,
+	assertion_failure_proc: Assertion_Failure_Proc,
+	logger:                 Logger,
+
+	user_ptr:   rawptr,
+	user_index: int,
+
+	// Internal use only
+	_internal: rawptr,
+}
+
+
+Raw_String :: struct {
+	data: [^]byte,
+	len:  int,
+}
+
+Raw_Slice :: struct {
+	data: rawptr,
+	len:  int,
+}
+
+Raw_Dynamic_Array :: struct {
+	data:      rawptr,
+	len:       int,
+	cap:       int,
+	allocator: Allocator,
+}
+
+// The raw, type-erased representation of a map.
+//
+// 32-bytes on 64-bit
+// 16-bytes on 32-bit
+Raw_Map :: struct {
+	// A single allocation spanning all keys, values, and hashes.
+	// {
+	//   k: Map_Cell(K) * (capacity / ks_per_cell)
+	//   v: Map_Cell(V) * (capacity / vs_per_cell)
+	//   h: Map_Cell(H) * (capacity / hs_per_cell)
+	// }
+	//
+	// The data is allocated assuming 64-byte alignment, meaning the address is
+	// always a multiple of 64. This means we have 6 bits of zeros in the pointer
+	// to store the capacity. We can store a value as large as 2^6-1 or 63 in
+	// there. This conveniently is the maximum log2 capacity we can have for a map
+	// as Odin uses signed integers to represent capacity.
+	//
+	// Since the hashes are backed by Map_Hash, which is just a 64-bit unsigned
+	// integer, the cell structure for hashes is unnecessary because 64/8 is 8 and
+	// requires no padding, meaning it can be indexed as a regular array of
+	// Map_Hash directly, though for consistency sake it's written as if it were
+	// an array of Map_Cell(Map_Hash).
+	data:      uintptr,   // 8-bytes on 64-bits, 4-bytes on 32-bits
+	len:       uintptr,   // 8-bytes on 64-bits, 4-bytes on 32-bits
+	allocator: Allocator, // 16-bytes on 64-bits, 8-bytes on 32-bits
+}
+
+Raw_Any :: struct {
+	data: rawptr,
+	id:   typeid,
+}
+
+Raw_Cstring :: struct {
+	data: [^]byte,
+}
+
+Raw_Soa_Pointer :: struct {
+	data:  rawptr,
+	index: int,
+}
+
+
+
+/*
+	// Defined internally by the compiler
+	Odin_OS_Type :: enum int {
+		Unknown,
+		Windows,
+		Darwin,
+		Linux,
+		Essence,
+		FreeBSD,
+		OpenBSD,
+		WASI,
+		JS,
+		Freestanding,
+	}
+*/
+Odin_OS_Type :: type_of(ODIN_OS)
+
+/*
+	// Defined internally by the compiler
+	Odin_Arch_Type :: enum int {
+		Unknown,
+		amd64,
+		i386,
+		arm32,
+		arm64,
+		wasm32,
+		wasm64p32,
+	}
+*/
+Odin_Arch_Type :: type_of(ODIN_ARCH)
+
+/*
+	// Defined internally by the compiler
+	Odin_Build_Mode_Type :: enum int {
+		Executable,
+		Dynamic,
+		Object,
+		Assembly,
+		LLVM_IR,
+	}
+*/
+Odin_Build_Mode_Type :: type_of(ODIN_BUILD_MODE)
+
+/*
+	// Defined internally by the compiler
+	Odin_Endian_Type :: enum int {
+		Unknown,
+		Little,
+		Big,
+	}
+*/
+Odin_Endian_Type :: type_of(ODIN_ENDIAN)
+
+
+/*
+	// Defined internally by the compiler
+	Odin_Platform_Subtarget_Type :: enum int {
+		Default,
+		iOS,
+	}
+*/
+Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)
+
+/*
+	// Defined internally by the compiler
+	Odin_Sanitizer_Flag :: enum u32 {
+		Address = 0,
+		Memory  = 1,
+		Thread  = 2,
+	}
+	Odin_Sanitizer_Flags :: distinct bitset[Odin_Sanitizer_Flag; u32]
+
+	ODIN_SANITIZER_FLAGS // is a constant
+*/
+Odin_Sanitizer_Flags :: type_of(ODIN_SANITIZER_FLAGS)
+
+
+/////////////////////////////
+// Init Startup Procedures //
+/////////////////////////////
+
+// IMPORTANT NOTE(bill): Do not call this unless you want to explicitly set up the entry point and how it gets called
+// This is probably only useful for freestanding targets
+foreign {
+	@(link_name="__$startup_runtime")
+	_startup_runtime :: proc "odin" () ---
+	@(link_name="__$cleanup_runtime")
+	_cleanup_runtime :: proc "odin" () ---
+}
+
+_cleanup_runtime_contextless :: proc "contextless" () {
+	context = default_context()
+	_cleanup_runtime()
+}
+
+
+/////////////////////////////
+/////////////////////////////
+/////////////////////////////
+
+
+type_info_base :: proc "contextless" (info: ^Type_Info) -> ^Type_Info {
+	if info == nil {
+		return nil
+	}
+
+	base := info
+	loop: for {
+		#partial switch i in base.variant {
+		case Type_Info_Named: base = i.base
+		case: break loop
+		}
+	}
+	return base
+}
+
+
+type_info_core :: proc "contextless" (info: ^Type_Info) -> ^Type_Info {
+	if info == nil {
+		return nil
+	}
+
+	base := info
+	loop: for {
+		#partial switch i in base.variant {
+		case Type_Info_Named:  base = i.base
+		case Type_Info_Enum:   base = i.base
+		case: break loop
+		}
+	}
+	return base
+}
+type_info_base_without_enum :: type_info_core
+
+__type_info_of :: proc "contextless" (id: typeid) -> ^Type_Info #no_bounds_check {
+	MASK :: 1<<(8*size_of(typeid) - 8) - 1
+	data := transmute(uintptr)id
+	n := int(data & MASK)
+	if n < 0 || n >= len(type_table) {
+		n = 0
+	}
+	return &type_table[n]
+}
+
+when !ODIN_NO_RTTI {
+	typeid_base :: proc "contextless" (id: typeid) -> typeid {
+		ti := type_info_of(id)
+		ti = type_info_base(ti)
+		return ti.id
+	}
+	typeid_core :: proc "contextless" (id: typeid) -> typeid {
+		ti := type_info_core(type_info_of(id))
+		return ti.id
+	}
+	typeid_base_without_enum :: typeid_core
+}
+
+
+
+debug_trap         :: intrinsics.debug_trap
+trap               :: intrinsics.trap
+read_cycle_counter :: intrinsics.read_cycle_counter
+
+
+
+default_logger_proc :: proc(data: rawptr, level: Logger_Level, text: string, options: Logger_Options, location := #caller_location) {
+	// Nothing
+}
+
+default_logger :: proc() -> Logger {
+	return Logger{default_logger_proc, nil, Logger_Level.Debug, nil}
+}
+
+
+default_context :: proc "contextless" () -> Context {
+	c: Context
+	__init_context(&c)
+	return c
+}
+
+@private
+__init_context_from_ptr :: proc "contextless" (c: ^Context, other: ^Context) {
+	if c == nil {
+		return
+	}
+	c^ = other^
+	__init_context(c)
+}
+
+@private
+__init_context :: proc "contextless" (c: ^Context) {
+	if c == nil {
+		return
+	}
+
+	// NOTE(bill): Do not initialize these procedures with a call as they are not defined with the "contextless" calling convention
+	c.allocator.procedure = default_allocator_proc
+	c.allocator.data = nil
+
+	c.temp_allocator.procedure = default_temp_allocator_proc
+	when !NO_DEFAULT_TEMP_ALLOCATOR {
+		c.temp_allocator.data = &global_default_temp_allocator_data
+	}
+	
+	when !ODIN_DISABLE_ASSERT {
+		c.assertion_failure_proc = default_assertion_failure_proc
+	}
+
+	c.logger.procedure = default_logger_proc
+	c.logger.data = nil
+}
+
+default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
+	when ODIN_OS == .Freestanding {
+		// Do nothing
+	} else {
+		when !ODIN_DISABLE_ASSERT {
+			print_caller_location(loc)
+			print_string(" ")
+		}
+		print_string(prefix)
+		if len(message) > 0 {
+			print_string(": ")
+			print_string(message)
+		}
+		print_byte('\n')
+	}
+	trap()
+}
diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin
new file mode 100644
index 000000000..3f4ebbc74
--- /dev/null
+++ b/base/runtime/core_builtin.odin
@@ -0,0 +1,915 @@
+package runtime
+
+import "core:intrinsics"
+
+@builtin
+Maybe :: union($T: typeid) {T}
+
+
+@(builtin, require_results)
+container_of :: #force_inline proc "contextless" (ptr: $P/^$Field_Type, $T: typeid, $field_name: string) -> ^T
+	where intrinsics.type_has_field(T, field_name),
+	      intrinsics.type_field_type(T, field_name) == Field_Type {
+	offset :: offset_of_by_string(T, field_name)
+	return (^T)(uintptr(ptr) - offset) if ptr != nil else nil
+}
+
+
+when !NO_DEFAULT_TEMP_ALLOCATOR {
+	@thread_local global_default_temp_allocator_data: Default_Temp_Allocator
+}
+
+@(builtin, disabled=NO_DEFAULT_TEMP_ALLOCATOR)
+init_global_temporary_allocator :: proc(size: int, backup_allocator := context.allocator) {
+	when !NO_DEFAULT_TEMP_ALLOCATOR {
+		default_temp_allocator_init(&global_default_temp_allocator_data, size, backup_allocator)
+	}
+}
+
+
+// `copy_slice` is a built-in procedure that copies elements from a source slice `src` to a destination slice `dst`.
+// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
+// of len(src) and len(dst).
+//
+// Prefer the procedure group `copy`.
+@builtin
+copy_slice :: proc "contextless" (dst, src: $T/[]$E) -> int {
+	n := max(0, min(len(dst), len(src)))
+	if n > 0 {
+		intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(E))
+	}
+	return n
+}
+// `copy_from_string` is a built-in procedure that copies elements from a source slice `src` to a destination string `dst`.
+// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
+// of len(src) and len(dst).
+//
+// Prefer the procedure group `copy`.
+@builtin
+copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int {
+	n := max(0, min(len(dst), len(src)))
+	if n > 0 {
+		intrinsics.mem_copy(raw_data(dst), raw_data(src), n)
+	}
+	return n
+}
+// `copy` is a built-in procedure that copies elements from a source slice `src` to a destination slice/string `dst`.
+// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
+// of len(src) and len(dst).
+@builtin
+copy :: proc{copy_slice, copy_from_string}
+
+
+
+// `unordered_remove` removed the element at the specified `index`. It does so by replacing the current end value
+// with the old value, and reducing the length of the dynamic array by 1.
+//
+// Note: This is an O(1) operation.
+// Note: If you the elements to remain in their order, use `ordered_remove`.
+// Note: If the index is out of bounds, this procedure will panic.
+@builtin
+unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
+	bounds_check_error_loc(loc, index, len(array))
+	n := len(array)-1
+	if index != n {
+		array[index] = array[n]
+	}
+	(^Raw_Dynamic_Array)(array).len -= 1
+}
+// `ordered_remove` removed the element at the specified `index` whilst keeping the order of the other elements.
+//
+// Note: This is an O(N) operation.
+// Note: If you the elements do not have to remain in their order, prefer `unordered_remove`.
+// Note: If the index is out of bounds, this procedure will panic.
+@builtin
+ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
+	bounds_check_error_loc(loc, index, len(array))
+	if index+1 < len(array) {
+		copy(array[index:], array[index+1:])
+	}
+	(^Raw_Dynamic_Array)(array).len -= 1
+}
+
+// `remove_range` removes a range of elements specified by the range `lo` and `hi`, whilst keeping the order of the other elements.
+//
+// Note: This is an O(N) operation.
+// Note: If the range is out of bounds, this procedure will panic.
+@builtin
+remove_range :: proc(array: ^$D/[dynamic]$T, lo, hi: int, loc := #caller_location) #no_bounds_check {
+	slice_expr_error_lo_hi_loc(loc, lo, hi, len(array))
+	n := max(hi-lo, 0)
+	if n > 0 {
+		if hi != len(array) {
+			copy(array[lo:], array[hi:])
+		}
+		(^Raw_Dynamic_Array)(array).len -= n
+	}
+}
+
+
+// `pop` will remove and return the end value of dynamic array `array` and reduces the length of `array` by 1.
+//
+// Note: If the dynamic array has no elements (`len(array) == 0`), this procedure will panic.
+@builtin
+pop :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (res: E) #no_bounds_check {
+	assert(len(array) > 0, loc=loc)
+	res = array[len(array)-1]
+	(^Raw_Dynamic_Array)(array).len -= 1
+	return res
+}
+
+
+// `pop_safe` trys to remove and return the end value of dynamic array `array` and reduces the length of `array` by 1.
+// If the operation is not possible, it will return false.
+@builtin
+pop_safe :: proc(array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
+	if len(array) == 0 {
+		return
+	}
+	res, ok = array[len(array)-1], true
+	(^Raw_Dynamic_Array)(array).len -= 1
+	return
+}
+
+// `pop_front` will remove and return the first value of dynamic array `array` and reduces the length of `array` by 1.
+//
+// Note: If the dynamic array as no elements (`len(array) == 0`), this procedure will panic.
+@builtin
+pop_front :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (res: E) #no_bounds_check {
+	assert(len(array) > 0, loc=loc)
+	res = array[0]
+	if len(array) > 1 {
+		copy(array[0:], array[1:])
+	}
+	(^Raw_Dynamic_Array)(array).len -= 1
+	return res
+}
+
+// `pop_front_safe` trys to return and remove the first value of dynamic array `array` and reduces the length of `array` by 1.
+// If the operation is not possible, it will return false.
+@builtin
+pop_front_safe :: proc(array: ^$T/[dynamic]$E) -> (res: E, ok: bool) #no_bounds_check {
+	if len(array) == 0 {
+		return
+	}
+	res, ok = array[0], true
+	if len(array) > 1 {
+		copy(array[0:], array[1:])
+	}
+	(^Raw_Dynamic_Array)(array).len -= 1
+	return
+}
+
+
+// `clear` will set the length of a passed dynamic array or map to `0`
+@builtin
+clear :: proc{clear_dynamic_array, clear_map}
+
+// `reserve` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
+@builtin
+reserve :: proc{reserve_dynamic_array, reserve_map}
+
+@builtin
+non_zero_reserve :: proc{non_zero_reserve_dynamic_array}
+
+// `resize` will try to resize memory of a passed dynamic array to the requested element count (setting the `len`, and possibly `cap`).
+@builtin
+resize :: proc{resize_dynamic_array}
+
+@builtin
+non_zero_resize :: proc{non_zero_resize_dynamic_array}
+
+// Shrinks the capacity of a dynamic array or map down to the current length, or the given capacity.
+@builtin
+shrink :: proc{shrink_dynamic_array, shrink_map}
+
+// `free` will try to free the passed pointer, with the given `allocator` if the allocator supports this operation.
+@builtin
+free :: proc{mem_free}
+
+// `free_all` will try to free/reset all of the memory of the given `allocator` if the allocator supports this operation.
+@builtin
+free_all :: proc{mem_free_all}
+
+
+
+// `delete_string` will try to free the underlying data of the passed string, with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer the procedure group `delete`.
+@builtin
+delete_string :: proc(str: string, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	return mem_free_with_size(raw_data(str), len(str), allocator, loc)
+}
+// `delete_cstring` will try to free the underlying data of the passed string, with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer the procedure group `delete`.
+@builtin
+delete_cstring :: proc(str: cstring, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	return mem_free((^byte)(str), allocator, loc)
+}
+// `delete_dynamic_array` will try to free the underlying data of the passed dynamic array, with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer the procedure group `delete`.
+@builtin
+delete_dynamic_array :: proc(array: $T/[dynamic]$E, loc := #caller_location) -> Allocator_Error {
+	return mem_free_with_size(raw_data(array), cap(array)*size_of(E), array.allocator, loc)
+}
+// `delete_slice` will try to free the underlying data of the passed sliced, with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer the procedure group `delete`.
+@builtin
+delete_slice :: proc(array: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	return mem_free_with_size(raw_data(array), len(array)*size_of(E), allocator, loc)
+}
+// `delete_map` will try to free the underlying data of the passed map, with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer the procedure group `delete`.
+@builtin
+delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error {
+	return map_free_dynamic(transmute(Raw_Map)m, map_info(T), loc)
+}
+
+
+// `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation.
+//
+// Note: Prefer `delete` over the specific `delete_*` procedures where possible.
+@builtin
+delete :: proc{
+	delete_string,
+	delete_cstring,
+	delete_dynamic_array,
+	delete_slice,
+	delete_map,
+	delete_soa_slice,
+	delete_soa_dynamic_array,
+}
+
+
+// The new built-in procedure allocates memory. The first argument is a type, not a value, and the value
+// return is a pointer to a newly allocated value of that type using the specified allocator, default is context.allocator
+@(builtin, require_results)
+new :: proc($T: typeid, allocator := context.allocator, loc := #caller_location) -> (^T, Allocator_Error) #optional_allocator_error {
+	return new_aligned(T, align_of(T), allocator, loc)
+}
+@(require_results)
+new_aligned :: proc($T: typeid, alignment: int, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) {
+	data := mem_alloc_bytes(size_of(T), alignment, allocator, loc) or_return
+	t = (^T)(raw_data(data))
+	return
+}
+
+@(builtin, require_results)
+new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) #optional_allocator_error {
+	t_data := mem_alloc_bytes(size_of(T), align_of(T), allocator, loc) or_return
+	t = (^T)(raw_data(t_data))
+	if t != nil {
+		t^ = data
+	}
+	return
+}
+
+DEFAULT_RESERVE_CAPACITY :: 16
+
+@(require_results)
+make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
+	make_slice_error_loc(loc, len)
+	data, err := mem_alloc_bytes(size_of(E)*len, alignment, allocator, loc)
+	if data == nil && size_of(E) != 0 {
+		return nil, err
+	}
+	s := Raw_Slice{raw_data(data), len}
+	return transmute(T)s, err
+}
+
+// `make_slice` allocates and initializes a slice. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
+	return make_aligned(T, len, align_of(E), allocator, loc)
+}
+// `make_dynamic_array` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
+	return make_dynamic_array_len_cap(T, 0, DEFAULT_RESERVE_CAPACITY, allocator, loc)
+}
+// `make_dynamic_array_len` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
+	return make_dynamic_array_len_cap(T, len, len, allocator, loc)
+}
+// `make_dynamic_array_len_cap` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	make_dynamic_array_error_loc(loc, len, cap)
+	data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
+	s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
+	if data == nil && size_of(E) != 0 {
+		s.len, s.cap = 0, 0
+	}
+	array = transmute(T)s
+	return
+}
+// `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_map :: proc($T: typeid/map[$K]$E, #any_int capacity: int = 1<<MAP_MIN_LOG2_CAPACITY, allocator := context.allocator, loc := #caller_location) -> (m: T, err: Allocator_Error) #optional_allocator_error {
+	make_map_expr_error_loc(loc, capacity)
+	context.allocator = allocator
+
+	err = reserve_map(&m, capacity, loc)
+	return
+}
+// `make_multi_pointer` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
+// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
+//
+// This is "similar" to doing `raw_data(make([]E, len, allocator))`.
+//
+// Note: Prefer using the procedure group `make`.
+@(builtin, require_results)
+make_multi_pointer :: proc($T: typeid/[^]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (mp: T, err: Allocator_Error) #optional_allocator_error {
+	make_slice_error_loc(loc, len)
+	data := mem_alloc_bytes(size_of(E)*len, align_of(E), allocator, loc) or_return
+	if data == nil && size_of(E) != 0 {
+		return
+	}
+	mp = cast(T)raw_data(data)
+	return
+}
+
+
+// `make` built-in procedure allocates and initializes a value of type slice, dynamic array, map, or multi-pointer (only).
+//
+// Similar to `new`, the first argument is a type, not a value. Unlike new, make's return type is the same as the
+// type of its argument, not a pointer to it.
+// Make uses the specified allocator, default is context.allocator.
+@builtin
+make :: proc{
+	make_slice,
+	make_dynamic_array,
+	make_dynamic_array_len,
+	make_dynamic_array_len_cap,
+	make_map,
+	make_multi_pointer,
+}
+
+
+
+// `clear_map` will set the length of a passed map to `0`
+//
+// Note: Prefer the procedure group `clear`
+@builtin
+clear_map :: proc "contextless" (m: ^$T/map[$K]$V) {
+	if m == nil {
+		return
+	}
+	map_clear_dynamic((^Raw_Map)(m), map_info(T))
+}
+
+// `reserve_map` will try to reserve memory of a passed map to the requested element count (setting the `cap`).
+//
+// Note: Prefer the procedure group `reserve`
+@builtin
+reserve_map :: proc(m: ^$T/map[$K]$V, capacity: int, loc := #caller_location) -> Allocator_Error {
+	return __dynamic_map_reserve((^Raw_Map)(m), map_info(T), uint(capacity), loc) if m != nil else nil
+}
+
+// Shrinks the capacity of a map down to the current length.
+//
+// Note: Prefer the procedure group `shrink`
+@builtin
+shrink_map :: proc(m: ^$T/map[$K]$V, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
+	if m != nil {
+		return map_shrink_dynamic((^Raw_Map)(m), map_info(T), loc)
+	}
+	return
+}
+
+// The delete_key built-in procedure deletes the element with the specified key (m[key]) from the map.
+// If m is nil, or there is no such element, this procedure is a no-op
+@builtin
+delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value: V) {
+	if m != nil {
+		key := key
+		old_k, old_v, ok := map_erase_dynamic((^Raw_Map)(m), map_info(T), uintptr(&key))
+		if ok {
+			deleted_key   = (^K)(old_k)^
+			deleted_value = (^V)(old_v)^
+		}
+	}
+	return
+}
+
+_append_elem :: #force_inline proc(array: ^$T/[dynamic]$E, arg: E, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	if array == nil {
+		return 0, nil
+	}
+	when size_of(E) == 0 {
+		array := (^Raw_Dynamic_Array)(array)
+		array.len += 1
+		return 1, nil
+	} else {
+		if cap(array) < len(array)+1 {
+			cap := 2 * cap(array) + max(8, 1)
+
+			// do not 'or_return' here as it could be a partial success
+			if should_zero {
+				err = reserve(array, cap, loc)
+			} else {
+				err = non_zero_reserve(array, cap, loc) 
+			}
+		}
+		if cap(array)-len(array) > 0 {
+			a := (^Raw_Dynamic_Array)(array)
+			when size_of(E) != 0 {
+				data := ([^]E)(a.data)
+				assert(data != nil, loc=loc)
+				data[a.len] = arg
+			}
+			a.len += 1
+			return 1, err
+		}
+		return 0, err
+	}
+}
+
+@builtin
+append_elem :: proc(array: ^$T/[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elem(array, arg, true, loc=loc)
+}
+
+@builtin
+non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elem(array, arg, false, loc=loc)
+}
+
+_append_elems :: #force_inline proc(array: ^$T/[dynamic]$E, should_zero: bool, loc := #caller_location, args: ..E) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	if array == nil {
+		return 0, nil
+	}
+
+	arg_len := len(args)
+	if arg_len <= 0 {
+		return 0, nil
+	}
+
+	when size_of(E) == 0 {
+		array := (^Raw_Dynamic_Array)(array)
+		array.len += arg_len
+		return arg_len, nil
+	} else {
+		if cap(array) < len(array)+arg_len {
+			cap := 2 * cap(array) + max(8, arg_len)
+
+			// do not 'or_return' here as it could be a partial success
+			if should_zero {
+				err = reserve(array, cap, loc)
+			} else {
+				err = non_zero_reserve(array, cap, loc)
+			}
+		}
+		arg_len = min(cap(array)-len(array), arg_len)
+		if arg_len > 0 {
+			a := (^Raw_Dynamic_Array)(array)
+			when size_of(E) != 0 {
+				data := ([^]E)(a.data)
+				assert(data != nil, loc=loc)
+				intrinsics.mem_copy(&data[a.len], raw_data(args), size_of(E) * arg_len)
+			}
+			a.len += arg_len
+		}
+		return arg_len, err
+	}
+}
+
+@builtin
+append_elems :: proc(array: ^$T/[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elems(array, true, loc, ..args)
+}
+
+@builtin
+non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elems(array, false, loc, ..args)
+}
+
+// The append_string built-in procedure appends a string to the end of a [dynamic]u8 like type
+_append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	args := transmute([]E)arg
+	if should_zero { 
+		return append_elems(array, ..args, loc=loc)
+	} else {
+		return non_zero_append_elems(array, ..args, loc=loc)
+	}
+}
+
+@builtin
+append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elem_string(array, arg, true, loc)
+}
+@builtin
+non_zero_append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	return _append_elem_string(array, arg, false, loc)
+}
+
+
+// The append_string built-in procedure appends multiple strings to the end of a [dynamic]u8 like type
+@builtin
+append_string :: proc(array: ^$T/[dynamic]$E/u8, args: ..string, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	n_arg: int
+	for arg in args {
+		n_arg, err = append(array, ..transmute([]E)(arg), loc=loc)
+		n += n_arg
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+// The append built-in procedure appends elements to the end of a dynamic array
+@builtin append :: proc{append_elem, append_elems, append_elem_string}
+@builtin non_zero_append :: proc{non_zero_append_elem, non_zero_append_elems, non_zero_append_elem_string}
+
+
+@builtin
+append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	if array == nil {
+		return 0, nil
+	}
+	prev_len := len(array)
+	resize(array, len(array)+1, loc) or_return
+	return len(array)-prev_len, nil
+}
+
+
+@builtin
+inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	if array == nil {
+		return
+	}
+	n := max(len(array), index)
+	m :: 1
+	new_size := n + m
+
+	resize(array, new_size, loc) or_return
+	when size_of(E) != 0 {
+		copy(array[index + m:], array[index:])
+		array[index] = arg
+	}
+	ok = true
+	return
+}
+
+@builtin
+inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	if array == nil {
+		return
+	}
+	if len(args) == 0 {
+		ok = true
+		return
+	}
+
+	n := max(len(array), index)
+	m := len(args)
+	new_size := n + m
+
+	resize(array, new_size, loc) or_return
+	when size_of(E) != 0 {
+		copy(array[index + m:], array[index:])
+		copy(array[index:], args)
+	}
+	ok = true
+	return
+}
+
+@builtin
+inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	if array == nil {
+		return
+	}
+	if len(arg) == 0 {
+		ok = true
+		return
+	}
+
+	n := max(len(array), index)
+	m := len(arg)
+	new_size := n + m
+
+	resize(array, new_size, loc) or_return
+	copy(array[index+m:], array[index:])
+	copy(array[index:], arg)
+	ok = true
+	return
+}
+
+@builtin inject_at :: proc{inject_at_elem, inject_at_elems, inject_at_elem_string}
+
+
+
+@builtin
+assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	if index < len(array) {
+		array[index] = arg
+		ok = true
+	} else {
+		resize(array, index+1, loc) or_return
+		array[index] = arg
+		ok = true
+	}
+	return
+}
+
+
+@builtin
+assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	new_size := index + len(args)
+	if len(args) == 0 {
+		ok = true
+	} else if new_size < len(array) {
+		copy(array[index:], args)
+		ok = true
+	} else {
+		resize(array, new_size, loc) or_return
+		copy(array[index:], args)
+		ok = true
+	}
+	return
+}
+
+
+@builtin
+assign_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	new_size := index + len(arg)
+	if len(arg) == 0 {
+		ok = true
+	} else if new_size < len(array) {
+		copy(array[index:], arg)
+		ok = true
+	} else {
+		resize(array, new_size, loc) or_return
+		copy(array[index:], arg)
+		ok = true
+	}
+	return
+}
+
+@builtin assign_at :: proc{assign_at_elem, assign_at_elems, assign_at_elem_string}
+
+
+
+
+// `clear_dynamic_array` will set the length of a passed dynamic array to `0`
+//
+// Note: Prefer the procedure group `clear`.
+@builtin
+clear_dynamic_array :: proc "contextless" (array: ^$T/[dynamic]$E) {
+	if array != nil {
+		(^Raw_Dynamic_Array)(array).len = 0
+	}
+}
+
+// `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
+//
+// Note: Prefer the procedure group `reserve`.
+_reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
+	if array == nil {
+		return nil
+	}
+	a := (^Raw_Dynamic_Array)(array)
+
+	if capacity <= a.cap {
+		return nil
+	}
+
+	if a.allocator.procedure == nil {
+		a.allocator = context.allocator
+	}
+	assert(a.allocator.procedure != nil)
+
+	old_size  := a.cap * size_of(E)
+	new_size  := capacity * size_of(E)
+	allocator := a.allocator
+
+	new_data: []byte
+	if should_zero {
+		new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
+	} else {
+		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
+	}
+	if new_data == nil && new_size > 0 {
+		return .Out_Of_Memory
+	}
+
+	a.data = raw_data(new_data)
+	a.cap = capacity
+	return nil
+}
+
+@builtin
+reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
+	return _reserve_dynamic_array(array, capacity, true, loc)
+}
+
+@builtin
+non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
+	return _reserve_dynamic_array(array, capacity, false, loc)
+}
+
+// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
+//
+// Note: Prefer the procedure group `resize`
+_resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
+	if array == nil {
+		return nil
+	}
+	a := (^Raw_Dynamic_Array)(array)
+
+	if length <= a.cap {
+		a.len = max(length, 0)
+		return nil
+	}
+
+	if a.allocator.procedure == nil {
+		a.allocator = context.allocator
+	}
+	assert(a.allocator.procedure != nil)
+
+	old_size  := a.cap * size_of(E)
+	new_size  := length * size_of(E)
+	allocator := a.allocator
+
+	new_data : []byte
+	if should_zero {
+		new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
+	} else {
+		new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
+	}
+	if new_data == nil && new_size > 0 {
+		return .Out_Of_Memory
+	}
+
+	a.data = raw_data(new_data)
+	a.len = length
+	a.cap = length
+	return nil
+}
+
+@builtin
+resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
+	return _resize_dynamic_array(array, length, true, loc=loc)
+}
+
+@builtin
+non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
+	return _resize_dynamic_array(array, length, false, loc=loc)
+}
+
+/*
+	Shrinks the capacity of a dynamic array down to the current length, or the given capacity.
+
+	If `new_cap` is negative, then `len(array)` is used.
+
+	Returns false if `cap(array) < new_cap`, or the allocator report failure.
+
+	If `len(array) < new_cap`, then `len(array)` will be left unchanged.
+
+	Note: Prefer the procedure group `shrink`
+*/
+shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
+	if array == nil {
+		return
+	}
+	a := (^Raw_Dynamic_Array)(array)
+
+	new_cap := new_cap if new_cap >= 0 else a.len
+
+	if new_cap > a.cap {
+		return
+	}
+
+	if a.allocator.procedure == nil {
+		a.allocator = context.allocator
+	}
+	assert(a.allocator.procedure != nil)
+
+	old_size := a.cap * size_of(E)
+	new_size := new_cap * size_of(E)
+
+	new_data := mem_resize(a.data, old_size, new_size, align_of(E), a.allocator, loc) or_return
+
+	a.data = raw_data(new_data)
+	a.len = min(new_cap, a.len)
+	a.cap = new_cap
+	return true, nil
+}
+
+@builtin
+map_insert :: proc(m: ^$T/map[$K]$V, key: K, value: V, loc := #caller_location) -> (ptr: ^V) {
+	key, value := key, value
+	return (^V)(__dynamic_map_set_without_hash((^Raw_Map)(m), map_info(T), rawptr(&key), rawptr(&value), loc))
+}
+
+
+@builtin
+incl_elem :: proc(s: ^$S/bit_set[$E; $U], elem: E) {
+	s^ |= {elem}
+}
+@builtin
+incl_elems :: proc(s: ^$S/bit_set[$E; $U], elems: ..E) {
+	for elem in elems {
+		s^ |= {elem}
+	}
+}
+@builtin
+incl_bit_set :: proc(s: ^$S/bit_set[$E; $U], other: S) {
+	s^ |= other
+}
+@builtin
+excl_elem :: proc(s: ^$S/bit_set[$E; $U], elem: E) {
+	s^ &~= {elem}
+}
+@builtin
+excl_elems :: proc(s: ^$S/bit_set[$E; $U], elems: ..E) {
+	for elem in elems {
+		s^ &~= {elem}
+	}
+}
+@builtin
+excl_bit_set :: proc(s: ^$S/bit_set[$E; $U], other: S) {
+	s^ &~= other
+}
+
+@builtin incl :: proc{incl_elem, incl_elems, incl_bit_set}
+@builtin excl :: proc{excl_elem, excl_elems, excl_bit_set}
+
+
+@builtin
+card :: proc(s: $S/bit_set[$E; $U]) -> int {
+	when size_of(S) == 1 {
+		return int(intrinsics.count_ones(transmute(u8)s))
+	} else when size_of(S) == 2 {
+		return int(intrinsics.count_ones(transmute(u16)s))
+	} else when size_of(S) == 4 {
+		return int(intrinsics.count_ones(transmute(u32)s))
+	} else when size_of(S) == 8 {
+		return int(intrinsics.count_ones(transmute(u64)s))
+	} else when size_of(S) == 16 {
+		return int(intrinsics.count_ones(transmute(u128)s))
+	} else {
+		#panic("Unhandled card bit_set size")
+	}
+}
+
+
+
+@builtin
+@(disabled=ODIN_DISABLE_ASSERT)
+assert :: proc(condition: bool, message := "", loc := #caller_location) {
+	if !condition {
+		// NOTE(bill): This is wrapped in a procedure call
+		// to improve performance to make the CPU not
+		// execute speculatively, making it about an order of
+		// magnitude faster
+		@(cold)
+		internal :: proc(message: string, loc: Source_Code_Location) {
+			p := context.assertion_failure_proc
+			if p == nil {
+				p = default_assertion_failure_proc
+			}
+			p("runtime assertion", message, loc)
+		}
+		internal(message, loc)
+	}
+}
+
+@builtin
+panic :: proc(message: string, loc := #caller_location) -> ! {
+	p := context.assertion_failure_proc
+	if p == nil {
+		p = default_assertion_failure_proc
+	}
+	p("panic", message, loc)
+}
+
+@builtin
+unimplemented :: proc(message := "", loc := #caller_location) -> ! {
+	p := context.assertion_failure_proc
+	if p == nil {
+		p = default_assertion_failure_proc
+	}
+	p("not yet implemented", message, loc)
+}
diff --git a/base/runtime/core_builtin_matrix.odin b/base/runtime/core_builtin_matrix.odin
new file mode 100644
index 000000000..7d60d625c
--- /dev/null
+++ b/base/runtime/core_builtin_matrix.odin
@@ -0,0 +1,274 @@
+package runtime
+
+import "core:intrinsics"
+_ :: intrinsics
+
+
+@(builtin)
+determinant :: proc{
+	matrix1x1_determinant,
+	matrix2x2_determinant,
+	matrix3x3_determinant,
+	matrix4x4_determinant,
+}
+
+@(builtin)
+adjugate :: proc{
+	matrix1x1_adjugate,
+	matrix2x2_adjugate,
+	matrix3x3_adjugate,
+	matrix4x4_adjugate,
+}
+
+@(builtin)
+inverse_transpose :: proc{
+	matrix1x1_inverse_transpose,
+	matrix2x2_inverse_transpose,
+	matrix3x3_inverse_transpose,
+	matrix4x4_inverse_transpose,
+}
+
+
+@(builtin)
+inverse :: proc{
+	matrix1x1_inverse,
+	matrix2x2_inverse,
+	matrix3x3_inverse,
+	matrix4x4_inverse,
+}
+
+@(builtin, require_results)
+hermitian_adjoint :: proc "contextless" (m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1 {
+	return conj(transpose(m))
+}
+
+@(builtin, require_results)
+matrix_trace :: proc "contextless" (m: $M/matrix[$N, N]$T) -> (trace: T) {
+	for i in 0..<N {
+		trace += m[i, i]
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix_minor :: proc "contextless" (m: $M/matrix[$N, N]$T, row, column: int) -> (minor: T) where N > 1 {
+	K :: N-1
+	cut_down: matrix[K, K]T
+	for col_idx in 0..<K {
+		j := col_idx + int(col_idx >= column)
+		for row_idx in 0..<K {
+			i := row_idx + int(row_idx >= row)
+			cut_down[row_idx, col_idx] = m[i, j]
+		}
+	}
+	return determinant(cut_down)
+}
+
+
+
+@(builtin, require_results)
+matrix1x1_determinant :: proc "contextless" (m: $M/matrix[1, 1]$T) -> (det: T) {
+	return m[0, 0]
+}
+
+@(builtin, require_results)
+matrix2x2_determinant :: proc "contextless" (m: $M/matrix[2, 2]$T) -> (det: T) {
+	return m[0, 0]*m[1, 1] - m[0, 1]*m[1, 0]
+}
+@(builtin, require_results)
+matrix3x3_determinant :: proc "contextless" (m: $M/matrix[3, 3]$T) -> (det: T) {
+	a := +m[0, 0] * (m[1, 1] * m[2, 2] - m[1, 2] * m[2, 1])
+	b := -m[0, 1] * (m[1, 0] * m[2, 2] - m[1, 2] * m[2, 0])
+	c := +m[0, 2] * (m[1, 0] * m[2, 1] - m[1, 1] * m[2, 0])
+	return a + b + c
+}
+@(builtin, require_results)
+matrix4x4_determinant :: proc "contextless" (m: $M/matrix[4, 4]$T) -> (det: T) {
+	a := adjugate(m)
+	#no_bounds_check for i in 0..<4 {
+		det += m[0, i] * a[0, i]
+	}
+	return
+}
+
+
+
+
+@(builtin, require_results)
+matrix1x1_adjugate :: proc "contextless" (x: $M/matrix[1, 1]$T) -> (y: M) {
+	y = x
+	return
+}
+
+@(builtin, require_results)
+matrix2x2_adjugate :: proc "contextless" (x: $M/matrix[2, 2]$T) -> (y: M) {
+	y[0, 0] = +x[1, 1]
+	y[0, 1] = -x[1, 0]
+	y[1, 0] = -x[0, 1]
+	y[1, 1] = +x[0, 0]
+	return
+}
+
+@(builtin, require_results)
+matrix3x3_adjugate :: proc "contextless" (m: $M/matrix[3, 3]$T) -> (y: M) {
+	y[0, 0] = +(m[1, 1] * m[2, 2] - m[2, 1] * m[1, 2])
+	y[0, 1] = -(m[1, 0] * m[2, 2] - m[2, 0] * m[1, 2])
+	y[0, 2] = +(m[1, 0] * m[2, 1] - m[2, 0] * m[1, 1])
+	y[1, 0] = -(m[0, 1] * m[2, 2] - m[2, 1] * m[0, 2])
+	y[1, 1] = +(m[0, 0] * m[2, 2] - m[2, 0] * m[0, 2])
+	y[1, 2] = -(m[0, 0] * m[2, 1] - m[2, 0] * m[0, 1])
+	y[2, 0] = +(m[0, 1] * m[1, 2] - m[1, 1] * m[0, 2])
+	y[2, 1] = -(m[0, 0] * m[1, 2] - m[1, 0] * m[0, 2])
+	y[2, 2] = +(m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1])
+	return
+}
+
+
+@(builtin, require_results)
+matrix4x4_adjugate :: proc "contextless" (x: $M/matrix[4, 4]$T) -> (y: M) {
+	for i in 0..<4 {
+		for j in 0..<4 {
+			sign: T = 1 if (i + j) % 2 == 0 else -1
+			y[i, j] = sign * matrix_minor(x, i, j)
+		}
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix1x1_inverse_transpose :: proc "contextless" (x: $M/matrix[1, 1]$T) -> (y: M) {
+	y[0, 0] = 1/x[0, 0]
+	return
+}
+
+@(builtin, require_results)
+matrix2x2_inverse_transpose :: proc "contextless" (x: $M/matrix[2, 2]$T) -> (y: M) {
+	d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0]
+	when intrinsics.type_is_integer(T) {
+		y[0, 0] = +x[1, 1] / d
+		y[1, 0] = -x[0, 1] / d
+		y[0, 1] = -x[1, 0] / d
+		y[1, 1] = +x[0, 0] / d
+	} else {
+		id := 1 / d
+		y[0, 0] = +x[1, 1] * id
+		y[1, 0] = -x[0, 1] * id
+		y[0, 1] = -x[1, 0] * id
+		y[1, 1] = +x[0, 0] * id
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix3x3_inverse_transpose :: proc "contextless" (x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d := determinant(x)
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[i, j] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[i, j] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix4x4_inverse_transpose :: proc "contextless" (x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d: T
+	for i in 0..<4 {
+		d += x[0, i] * a[0, i]
+	}
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[i, j] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[i, j] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix1x1_inverse :: proc "contextless" (x: $M/matrix[1, 1]$T) -> (y: M) {
+	y[0, 0] = 1/x[0, 0]
+	return
+}
+
+@(builtin, require_results)
+matrix2x2_inverse :: proc "contextless" (x: $M/matrix[2, 2]$T) -> (y: M) {
+	d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0]
+	when intrinsics.type_is_integer(T) {
+		y[0, 0] = +x[1, 1] / d
+		y[0, 1] = -x[0, 1] / d
+		y[1, 0] = -x[1, 0] / d
+		y[1, 1] = +x[0, 0] / d
+	} else {
+		id := 1 / d
+		y[0, 0] = +x[1, 1] * id
+		y[0, 1] = -x[0, 1] * id
+		y[1, 0] = -x[1, 0] * id
+		y[1, 1] = +x[0, 0] * id
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix3x3_inverse :: proc "contextless" (x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d := determinant(x)
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[j, i] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<3 {
+			for j in 0..<3 {
+				y[i, j] = a[j, i] * id
+			}
+		}
+	}
+	return
+}
+
+@(builtin, require_results)
+matrix4x4_inverse :: proc "contextless" (x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check {
+	a := adjugate(x)
+	d: T
+	for i in 0..<4 {
+		d += x[0, i] * a[0, i]
+	}
+	when intrinsics.type_is_integer(T) {
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[j, i] / d
+			}
+		}
+	} else {
+		id := 1/d
+		for i in 0..<4 {
+			for j in 0..<4 {
+				y[i, j] = a[j, i] * id
+			}
+		}
+	}
+	return
+}
diff --git a/base/runtime/core_builtin_soa.odin b/base/runtime/core_builtin_soa.odin
new file mode 100644
index 000000000..6313a28f5
--- /dev/null
+++ b/base/runtime/core_builtin_soa.odin
@@ -0,0 +1,428 @@
+package runtime
+
+import "core:intrinsics"
+_ :: intrinsics
+
+/*
+
+	SOA types are implemented with this sort of layout:
+
+	SOA Fixed Array
+	struct {
+		f0: [N]T0,
+		f1: [N]T1,
+		f2: [N]T2,
+	}
+
+	SOA Slice
+	struct {
+		f0: ^T0,
+		f1: ^T1,
+		f2: ^T2,
+
+		len: int,
+	}
+
+	SOA Dynamic Array
+	struct {
+		f0: ^T0,
+		f1: ^T1,
+		f2: ^T2,
+
+		len: int,
+		cap: int,
+		allocator: Allocator,
+	}
+
+	A footer is used rather than a header purely to simplify access to the fields internally
+	i.e. field index of the AOS == SOA
+
+*/
+
+
+Raw_SOA_Footer_Slice :: struct {
+	len: int,
+}
+
+Raw_SOA_Footer_Dynamic_Array :: struct {
+	len: int,
+	cap: int,
+	allocator: Allocator,
+}
+
+@(builtin, require_results)
+raw_soa_footer_slice :: proc(array: ^$T/#soa[]$E) -> (footer: ^Raw_SOA_Footer_Slice) {
+	if array == nil {
+		return nil
+	}
+	field_count := uintptr(intrinsics.type_struct_field_count(E))
+	footer = (^Raw_SOA_Footer_Slice)(uintptr(array) + field_count*size_of(rawptr))
+	return
+}
+@(builtin, require_results)
+raw_soa_footer_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) -> (footer: ^Raw_SOA_Footer_Dynamic_Array) {
+	if array == nil {
+		return nil
+	}
+	field_count: uintptr
+	when intrinsics.type_is_array(E) {
+		field_count = len(E)
+	} else {
+		field_count = uintptr(intrinsics.type_struct_field_count(E))
+	}
+	footer = (^Raw_SOA_Footer_Dynamic_Array)(uintptr(array) + field_count*size_of(rawptr))
+	return
+}
+raw_soa_footer :: proc{
+	raw_soa_footer_slice,
+	raw_soa_footer_dynamic_array,
+}
+
+
+
+@(builtin, require_results)
+make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	if length <= 0 {
+		return
+	}
+
+	footer := raw_soa_footer(&array)
+	if size_of(E) == 0 {
+		footer.len = length
+		return
+	}
+
+	max_align := max(alignment, align_of(E))
+
+	ti := type_info_of(typeid_of(T))
+	ti = type_info_base(ti)
+	si := &ti.variant.(Type_Info_Struct)
+
+	field_count := uintptr(intrinsics.type_struct_field_count(E))
+
+	total_size := 0
+	for i in 0..<field_count {
+		type := si.types[i].variant.(Type_Info_Pointer).elem
+		total_size += type.size * length
+		total_size = align_forward_int(total_size, max_align)
+	}
+
+	allocator := allocator
+	if allocator.procedure == nil {
+		allocator = context.allocator
+	}
+	assert(allocator.procedure != nil)
+
+	new_bytes: []byte
+	new_bytes, err = allocator.procedure(
+		allocator.data, .Alloc, total_size, max_align,
+		nil, 0, loc,
+	)
+	if new_bytes == nil || err != nil {
+		return
+	}
+	new_data := raw_data(new_bytes)
+
+	data := uintptr(&array)
+	offset := 0
+	for i in 0..<field_count {
+		type := si.types[i].variant.(Type_Info_Pointer).elem
+
+		offset = align_forward_int(offset, max_align)
+
+		(^uintptr)(data)^ = uintptr(new_data) + uintptr(offset)
+		data += size_of(rawptr)
+		offset += type.size * length
+	}
+	footer.len = length
+
+	return
+}
+
+@(builtin, require_results)
+make_soa_slice :: proc($T: typeid/#soa[]$E, length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	return make_soa_aligned(T, length, align_of(E), allocator, loc)
+}
+
+@(builtin, require_results)
+make_soa_dynamic_array :: proc($T: typeid/#soa[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	context.allocator = allocator
+	reserve_soa(&array, DEFAULT_RESERVE_CAPACITY, loc) or_return
+	return array, nil
+}
+
+@(builtin, require_results)
+make_soa_dynamic_array_len :: proc($T: typeid/#soa[dynamic]$E, #any_int length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	context.allocator = allocator
+	resize_soa(&array, length, loc) or_return
+	return array, nil
+}
+
+@(builtin, require_results)
+make_soa_dynamic_array_len_cap :: proc($T: typeid/#soa[dynamic]$E, #any_int length, capacity: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	context.allocator = allocator
+	reserve_soa(&array, capacity, loc) or_return
+	resize_soa(&array, length, loc) or_return
+	return array, nil
+}
+
+
+@builtin
+make_soa :: proc{
+	make_soa_slice,
+	make_soa_dynamic_array,
+	make_soa_dynamic_array_len,
+	make_soa_dynamic_array_len_cap,
+}
+
+
+@builtin
+resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
+	if array == nil {
+		return nil
+	}
+	reserve_soa(array, length, loc) or_return
+	footer := raw_soa_footer(array)
+	footer.len = length
+	return nil
+}
+
+@builtin
+reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
+	if array == nil {
+		return nil
+	}
+
+	old_cap := cap(array)
+	if capacity <= old_cap {
+		return nil
+	}
+
+	if array.allocator.procedure == nil {
+		array.allocator = context.allocator
+	}
+	assert(array.allocator.procedure != nil)
+
+	footer := raw_soa_footer(array)
+	if size_of(E) == 0 {
+		footer.cap = capacity
+		return nil
+	}
+
+	ti := type_info_of(typeid_of(T))
+	ti = type_info_base(ti)
+	si := &ti.variant.(Type_Info_Struct)
+
+	field_count: uintptr
+	when intrinsics.type_is_array(E) {
+		field_count = len(E)
+	} else {
+		field_count = uintptr(intrinsics.type_struct_field_count(E))
+	}
+	assert(footer.cap == old_cap)
+
+	old_size := 0
+	new_size := 0
+
+	max_align :: align_of(E)
+	for i in 0..<field_count {
+		type := si.types[i].variant.(Type_Info_Pointer).elem
+
+		old_size += type.size * old_cap
+		new_size += type.size * capacity
+
+		old_size = align_forward_int(old_size, max_align)
+		new_size = align_forward_int(new_size, max_align)
+	}
+
+	old_data := (^rawptr)(array)^
+
+	new_bytes := array.allocator.procedure(
+		array.allocator.data, .Alloc, new_size, max_align,
+		nil, old_size, loc,
+	) or_return
+	new_data := raw_data(new_bytes)
+
+
+	footer.cap = capacity
+
+	old_offset := 0
+	new_offset := 0
+	for i in 0..<field_count {
+		type := si.types[i].variant.(Type_Info_Pointer).elem
+
+		old_offset = align_forward_int(old_offset, max_align)
+		new_offset = align_forward_int(new_offset, max_align)
+
+		new_data_elem := rawptr(uintptr(new_data) + uintptr(new_offset))
+		old_data_elem := rawptr(uintptr(old_data) + uintptr(old_offset))
+
+		mem_copy(new_data_elem, old_data_elem, type.size * old_cap)
+
+		(^rawptr)(uintptr(array) + i*size_of(rawptr))^ = new_data_elem
+
+		old_offset += type.size * old_cap
+		new_offset += type.size * capacity
+	}
+
+	array.allocator.procedure(
+		array.allocator.data, .Free, 0, max_align,
+		old_data, old_size, loc,
+	) or_return
+
+	return nil
+}
+
+@builtin
+append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	if array == nil {
+		return 0, nil
+	}
+
+	if cap(array) <= len(array) + 1 {
+		cap := 2 * cap(array) + 8
+		err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
+	}
+
+	footer := raw_soa_footer(array)
+
+	if size_of(E) > 0 && cap(array)-len(array) > 0 {
+		ti := type_info_of(T)
+		ti = type_info_base(ti)
+		si := &ti.variant.(Type_Info_Struct)
+		field_count: uintptr
+		when intrinsics.type_is_array(E) {
+			field_count = len(E)
+		} else {
+			field_count = uintptr(intrinsics.type_struct_field_count(E))
+		}
+
+		data := (^rawptr)(array)^
+
+		soa_offset := 0
+		item_offset := 0
+
+		arg_copy := arg
+		arg_ptr := &arg_copy
+
+		max_align :: align_of(E)
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Pointer).elem
+
+			soa_offset  = align_forward_int(soa_offset, max_align)
+			item_offset = align_forward_int(item_offset, type.align)
+
+			dst := rawptr(uintptr(data) + uintptr(soa_offset) + uintptr(type.size * footer.len))
+			src := rawptr(uintptr(arg_ptr) + uintptr(item_offset))
+			mem_copy(dst, src, type.size)
+
+			soa_offset  += type.size * cap(array)
+			item_offset += type.size
+		}
+		footer.len += 1
+		return 1, err
+	}
+	return 0, err
+}
+
+@builtin
+append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+	if array == nil {
+		return
+	}
+
+	arg_len := len(args)
+	if arg_len == 0 {
+		return
+	}
+
+	if cap(array) <= len(array)+arg_len {
+		cap := 2 * cap(array) + max(8, arg_len)
+		err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
+	}
+	arg_len = min(cap(array)-len(array), arg_len)
+
+	footer := raw_soa_footer(array)
+	if size_of(E) > 0 && arg_len > 0 {
+		ti := type_info_of(typeid_of(T))
+		ti = type_info_base(ti)
+		si := &ti.variant.(Type_Info_Struct)
+		field_count := uintptr(intrinsics.type_struct_field_count(E))
+
+		data := (^rawptr)(array)^
+
+		soa_offset := 0
+		item_offset := 0
+
+		args_ptr := &args[0]
+
+		max_align :: align_of(E)
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Pointer).elem
+
+			soa_offset  = align_forward_int(soa_offset, max_align)
+			item_offset = align_forward_int(item_offset, type.align)
+
+			dst := uintptr(data) + uintptr(soa_offset) + uintptr(type.size * footer.len)
+			src := uintptr(args_ptr) + uintptr(item_offset)
+			for j in 0..<arg_len {
+				d := rawptr(dst + uintptr(j*type.size))
+				s := rawptr(src + uintptr(j*size_of(E)))
+				mem_copy(d, s, type.size)
+			}
+
+			soa_offset  += type.size * cap(array)
+			item_offset += type.size
+		}
+	}
+	footer.len += arg_len
+	return arg_len, err
+}
+
+
+// The append_soa built-in procedure appends elements to the end of an #soa dynamic array
+@builtin
+append_soa :: proc{
+	append_soa_elem,
+	append_soa_elems,
+}
+
+
+delete_soa_slice :: proc(array: $T/#soa[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	when intrinsics.type_struct_field_count(E) != 0 {
+		array := array
+		ptr := (^rawptr)(&array)^
+		free(ptr, allocator, loc) or_return
+	}
+	return nil
+}
+
+delete_soa_dynamic_array :: proc(array: $T/#soa[dynamic]$E, loc := #caller_location) -> Allocator_Error {
+	when intrinsics.type_struct_field_count(E) != 0 {
+		array := array
+		ptr := (^rawptr)(&array)^
+		footer := raw_soa_footer(&array)
+		free(ptr, footer.allocator, loc) or_return
+	}
+	return nil
+}
+
+
+@builtin
+delete_soa :: proc{
+	delete_soa_slice,
+	delete_soa_dynamic_array,
+}
+
+
+clear_soa_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) {
+	when intrinsics.type_struct_field_count(E) != 0 {
+		footer := raw_soa_footer(array)
+		footer.len = 0
+	}
+}
+
+@builtin
+clear_soa :: proc{
+	clear_soa_dynamic_array,
+}
+\ No newline at end of file
diff --git a/base/runtime/default_allocators_arena.odin b/base/runtime/default_allocators_arena.odin
new file mode 100644
index 000000000..1fe3c6cfc
--- /dev/null
+++ b/base/runtime/default_allocators_arena.odin
@@ -0,0 +1,304 @@
+package runtime
+
+import "core:intrinsics"
+
+DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: uint(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE)
+
+Memory_Block :: struct {
+	prev:      ^Memory_Block,
+	allocator: Allocator,
+	base:      [^]byte,
+	used:      uint,
+	capacity:  uint,
+}
+
+Arena :: struct {
+	backing_allocator:  Allocator,
+	curr_block:         ^Memory_Block,
+	total_used:         uint,
+	total_capacity:     uint,
+	minimum_block_size: uint,
+	temp_count:         uint,
+}
+
+@(private, require_results)
+safe_add :: #force_inline proc "contextless" (x, y: uint) -> (uint, bool) {
+	z, did_overflow := intrinsics.overflow_add(x, y)
+	return z, !did_overflow
+}
+
+@(require_results)
+memory_block_alloc :: proc(allocator: Allocator, capacity: uint, alignment: uint, loc := #caller_location) -> (block: ^Memory_Block, err: Allocator_Error) {
+	total_size  := uint(capacity + max(alignment, size_of(Memory_Block)))
+	base_offset := uintptr(max(alignment, size_of(Memory_Block)))
+
+	min_alignment: int = max(16, align_of(Memory_Block), int(alignment))
+	data := mem_alloc(int(total_size), min_alignment, allocator, loc) or_return
+	block = (^Memory_Block)(raw_data(data))
+	end := uintptr(raw_data(data)[len(data):])
+
+	block.allocator = allocator
+	block.base = ([^]byte)(uintptr(block) + base_offset)
+	block.capacity = uint(end - uintptr(block.base))
+
+	// Should be zeroed
+	assert(block.used == 0)
+	assert(block.prev == nil)
+	return
+}
+
+memory_block_dealloc :: proc(block_to_free: ^Memory_Block, loc := #caller_location) {
+	if block_to_free != nil {
+		allocator := block_to_free.allocator
+		mem_free(block_to_free, allocator, loc)
+	}
+}
+
+@(require_results)
+alloc_from_memory_block :: proc(block: ^Memory_Block, min_size, alignment: uint) -> (data: []byte, err: Allocator_Error) {
+	calc_alignment_offset :: proc "contextless" (block: ^Memory_Block, alignment: uintptr) -> uint {
+		alignment_offset := uint(0)
+		ptr := uintptr(block.base[block.used:])
+		mask := alignment-1
+		if ptr & mask != 0 {
+			alignment_offset = uint(alignment - (ptr & mask))
+		}
+		return alignment_offset
+
+	}
+	if block == nil {
+		return nil, .Out_Of_Memory
+	}
+	alignment_offset := calc_alignment_offset(block, uintptr(alignment))
+	size, size_ok := safe_add(min_size, alignment_offset)
+	if !size_ok {
+		err = .Out_Of_Memory
+		return
+	}
+
+	if to_be_used, ok := safe_add(block.used, size); !ok || to_be_used > block.capacity {
+		err = .Out_Of_Memory
+		return
+	}
+	data = block.base[block.used+alignment_offset:][:min_size]
+	block.used += size
+	return
+}
+
+@(require_results)
+arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	align_forward_uint :: proc "contextless" (ptr, align: uint) -> uint {
+		p := ptr
+		modulo := p & (align-1)
+		if modulo != 0 {
+			p += align - modulo
+		}
+		return p
+	}
+
+	assert(alignment & (alignment-1) == 0, "non-power of two alignment", loc)
+
+	size := size
+	if size == 0 {
+		return
+	}
+	
+	needed := align_forward_uint(size, alignment)
+	if arena.curr_block == nil || (safe_add(arena.curr_block.used, needed) or_else 0) > arena.curr_block.capacity {
+		if arena.minimum_block_size == 0 {
+			arena.minimum_block_size = DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE
+		}
+
+		block_size := max(needed, arena.minimum_block_size)
+
+		if arena.backing_allocator.procedure == nil {
+			arena.backing_allocator = default_allocator()
+		}
+
+		new_block := memory_block_alloc(arena.backing_allocator, block_size, alignment, loc) or_return
+		new_block.prev = arena.curr_block
+		arena.curr_block = new_block
+		arena.total_capacity += new_block.capacity
+	}
+
+	prev_used := arena.curr_block.used
+	data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
+	arena.total_used += arena.curr_block.used - prev_used
+	return
+}
+
+// `arena_init` will initialize the arena with a usuable block.
+// This procedure is not necessary to use the Arena as the default zero as `arena_alloc` will set things up if necessary
+@(require_results)
+arena_init :: proc(arena: ^Arena, size: uint, backing_allocator: Allocator, loc := #caller_location) -> Allocator_Error {
+	arena^ = {}
+	arena.backing_allocator = backing_allocator
+	arena.minimum_block_size = max(size, 1<<12) // minimum block size of 4 KiB
+	new_block := memory_block_alloc(arena.backing_allocator, arena.minimum_block_size, 0, loc) or_return
+	arena.curr_block = new_block
+	arena.total_capacity += new_block.capacity
+	return nil
+}
+
+
+arena_free_last_memory_block :: proc(arena: ^Arena, loc := #caller_location) {
+	if free_block := arena.curr_block; free_block != nil {
+		arena.curr_block = free_block.prev
+
+		arena.total_capacity -= free_block.capacity
+		memory_block_dealloc(free_block, loc)
+	}
+}
+
+// `arena_free_all` will free all but the first memory block, and then reset the memory block
+arena_free_all :: proc(arena: ^Arena, loc := #caller_location) {
+	for arena.curr_block != nil && arena.curr_block.prev != nil {
+		arena_free_last_memory_block(arena, loc)
+	}
+
+	if arena.curr_block != nil {
+		intrinsics.mem_zero(arena.curr_block.base, arena.curr_block.used)
+		arena.curr_block.used = 0
+	}
+	arena.total_used = 0
+}
+
+arena_destroy :: proc(arena: ^Arena, loc := #caller_location) {
+	for arena.curr_block != nil {
+		free_block := arena.curr_block
+		arena.curr_block = free_block.prev
+
+		arena.total_capacity -= free_block.capacity
+		memory_block_dealloc(free_block, loc)
+	}
+	arena.total_used = 0
+	arena.total_capacity = 0
+}
+
+arena_allocator :: proc(arena: ^Arena) -> Allocator {
+	return Allocator{arena_allocator_proc, arena}
+}
+
+arena_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+                             size, alignment: int,
+                             old_memory: rawptr, old_size: int,
+                             location := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	arena := (^Arena)(allocator_data)
+
+	size, alignment := uint(size), uint(alignment)
+	old_size := uint(old_size)
+
+	switch mode {
+	case .Alloc, .Alloc_Non_Zeroed:
+		return arena_alloc(arena, size, alignment, location)
+	case .Free:
+		err = .Mode_Not_Implemented
+	case .Free_All:
+		arena_free_all(arena, location)
+	case .Resize, .Resize_Non_Zeroed:
+		old_data := ([^]byte)(old_memory)
+
+		switch {
+		case old_data == nil:
+			return arena_alloc(arena, size, alignment, location)
+		case size == old_size:
+			// return old memory
+			data = old_data[:size]
+			return
+		case size == 0:
+			err = .Mode_Not_Implemented
+			return
+		case (uintptr(old_data) & uintptr(alignment-1) == 0) && size < old_size:
+			// shrink data in-place
+			data = old_data[:size]
+			return
+		}
+
+		new_memory := arena_alloc(arena, size, alignment, location) or_return
+		if new_memory == nil {
+			return
+		}
+		copy(new_memory, old_data[:old_size])
+		return new_memory, nil
+	case .Query_Features:
+		set := (^Allocator_Mode_Set)(old_memory)
+		if set != nil {
+			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free_All, .Resize, .Query_Features}
+		}
+	case .Query_Info:
+		err = .Mode_Not_Implemented
+	}
+
+	return
+}
+
+
+
+
+Arena_Temp :: struct {
+	arena: ^Arena,
+	block: ^Memory_Block,
+	used:  uint,
+}
+
+@(require_results)
+arena_temp_begin :: proc(arena: ^Arena, loc := #caller_location) -> (temp: Arena_Temp) {
+	assert(arena != nil, "nil arena", loc)
+
+	temp.arena = arena
+	temp.block = arena.curr_block
+	if arena.curr_block != nil {
+		temp.used = arena.curr_block.used
+	}
+	arena.temp_count += 1
+	return
+}
+
+arena_temp_end :: proc(temp: Arena_Temp, loc := #caller_location) {
+	if temp.arena == nil {
+		assert(temp.block == nil)
+		assert(temp.used == 0)
+		return
+	}
+	arena := temp.arena
+
+	if temp.block != nil {
+		memory_block_found := false
+		for block := arena.curr_block; block != nil; block = block.prev {
+			if block == temp.block {
+				memory_block_found = true
+				break
+			}
+		}
+		if !memory_block_found {
+			assert(arena.curr_block == temp.block, "memory block stored within Arena_Temp not owned by Arena", loc)
+		}
+
+		for arena.curr_block != temp.block {
+			arena_free_last_memory_block(arena)
+		}
+
+		if block := arena.curr_block; block != nil {
+			assert(block.used >= temp.used, "out of order use of arena_temp_end", loc)
+			amount_to_zero := min(block.used-temp.used, block.capacity-block.used)
+			intrinsics.mem_zero(block.base[temp.used:], amount_to_zero)
+			block.used = temp.used
+		}
+	}
+
+	assert(arena.temp_count > 0, "double-use of arena_temp_end", loc)
+	arena.temp_count -= 1
+}
+
+// Ignore the use of a `arena_temp_begin` entirely
+arena_temp_ignore :: proc(temp: Arena_Temp, loc := #caller_location) {
+	assert(temp.arena != nil, "nil arena", loc)
+	arena := temp.arena
+
+	assert(arena.temp_count > 0, "double-use of arena_temp_end", loc)
+	arena.temp_count -= 1
+}
+
+arena_check_temp :: proc(arena: ^Arena, loc := #caller_location) {
+	assert(arena.temp_count == 0, "Arena_Temp not been ended", loc)
+}
diff --git a/base/runtime/default_allocators_general.odin b/base/runtime/default_allocators_general.odin
new file mode 100644
index 000000000..994a672b0
--- /dev/null
+++ b/base/runtime/default_allocators_general.odin
@@ -0,0 +1,23 @@
+//+build !windows
+//+build !freestanding
+//+build !wasi
+//+build !js
+package runtime
+
+// TODO(bill): reimplement these procedures in the os_specific stuff
+import "core:os"
+
+when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
+	_ :: os
+
+	// mem.nil_allocator reimplementation
+	default_allocator_proc :: nil_allocator_proc
+	default_allocator :: nil_allocator
+} else {
+
+	default_allocator_proc :: os.heap_allocator_proc
+
+	default_allocator :: proc() -> Allocator {
+		return os.heap_allocator()
+	}
+}
diff --git a/base/runtime/default_allocators_js.odin b/base/runtime/default_allocators_js.odin
new file mode 100644
index 000000000..715073f08
--- /dev/null
+++ b/base/runtime/default_allocators_js.odin
@@ -0,0 +1,5 @@
+//+build js
+package runtime
+
+default_allocator_proc :: panic_allocator_proc
+default_allocator :: panic_allocator
diff --git a/base/runtime/default_allocators_nil.odin b/base/runtime/default_allocators_nil.odin
new file mode 100644
index 000000000..c882f5196
--- /dev/null
+++ b/base/runtime/default_allocators_nil.odin
@@ -0,0 +1,88 @@
+package runtime
+
+nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+                               size, alignment: int,
+                               old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	switch mode {
+	case .Alloc, .Alloc_Non_Zeroed:
+		return nil, .Out_Of_Memory
+	case .Free:
+		return nil, .None
+	case .Free_All:
+		return nil, .Mode_Not_Implemented
+	case .Resize, .Resize_Non_Zeroed:
+		if size == 0 {
+			return nil, .None
+		}
+		return nil, .Out_Of_Memory
+	case .Query_Features:
+		return nil, .Mode_Not_Implemented
+	case .Query_Info:
+		return nil, .Mode_Not_Implemented
+	}
+	return nil, .None
+}
+
+nil_allocator :: proc() -> Allocator {
+	return Allocator{
+		procedure = nil_allocator_proc,
+		data = nil,
+	}
+}
+
+
+
+when ODIN_OS == .Freestanding {
+	default_allocator_proc :: nil_allocator_proc
+	default_allocator :: nil_allocator
+}
+
+
+
+panic_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+                             size, alignment: int,
+                             old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	switch mode {
+	case .Alloc:
+		if size > 0 {
+			panic("panic allocator, .Alloc called", loc=loc)
+		}
+	case .Alloc_Non_Zeroed:
+		if size > 0 {
+			panic("panic allocator, .Alloc_Non_Zeroed called", loc=loc)
+		}
+	case .Resize:
+		if size > 0 {
+			panic("panic allocator, .Resize called", loc=loc)
+		}
+	case .Resize_Non_Zeroed:
+		if size > 0 {
+			panic("panic allocator, .Alloc_Non_Zeroed called", loc=loc)
+		}
+	case .Free:
+		if old_memory != nil {
+			panic("panic allocator, .Free called", loc=loc)
+		}
+	case .Free_All:
+		panic("panic allocator, .Free_All called", loc=loc)
+
+	case .Query_Features:
+		set := (^Allocator_Mode_Set)(old_memory)
+		if set != nil {
+			set^ = {.Query_Features}
+		}
+		return nil, nil
+
+	case .Query_Info:
+		panic("panic allocator, .Query_Info called", loc=loc)
+	}
+
+	return nil, nil
+}
+
+panic_allocator :: proc() -> Allocator {
+	return Allocator{
+		procedure = panic_allocator_proc,
+		data = nil,
+	}
+}
diff --git a/base/runtime/default_allocators_wasi.odin b/base/runtime/default_allocators_wasi.odin
new file mode 100644
index 000000000..a7e6842a6
--- /dev/null
+++ b/base/runtime/default_allocators_wasi.odin
@@ -0,0 +1,5 @@
+//+build wasi
+package runtime
+
+default_allocator_proc :: panic_allocator_proc
+default_allocator :: panic_allocator
diff --git a/base/runtime/default_allocators_windows.odin b/base/runtime/default_allocators_windows.odin
new file mode 100644
index 000000000..1b0f78428
--- /dev/null
+++ b/base/runtime/default_allocators_windows.odin
@@ -0,0 +1,44 @@
+//+build windows
+package runtime
+
+when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
+	// mem.nil_allocator reimplementation
+	default_allocator_proc :: nil_allocator_proc
+	default_allocator :: nil_allocator
+} else {
+	default_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+	                                size, alignment: int,
+	                                old_memory: rawptr, old_size: int, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+		switch mode {
+		case .Alloc, .Alloc_Non_Zeroed:
+			data, err = _windows_default_alloc(size, alignment, mode == .Alloc)
+
+		case .Free:
+			_windows_default_free(old_memory)
+
+		case .Free_All:
+			return nil, .Mode_Not_Implemented
+
+		case .Resize, .Resize_Non_Zeroed:
+			data, err = _windows_default_resize(old_memory, old_size, size, alignment)
+
+		case .Query_Features:
+			set := (^Allocator_Mode_Set)(old_memory)
+			if set != nil {
+				set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Query_Features}
+			}
+
+		case .Query_Info:
+			return nil, .Mode_Not_Implemented
+		}
+
+		return
+	}
+
+	default_allocator :: proc() -> Allocator {
+		return Allocator{
+			procedure = default_allocator_proc,
+			data = nil,
+		}
+	}
+}
diff --git a/base/runtime/default_temporary_allocator.odin b/base/runtime/default_temporary_allocator.odin
new file mode 100644
index 000000000..c90f0388d
--- /dev/null
+++ b/base/runtime/default_temporary_allocator.odin
@@ -0,0 +1,79 @@
+package runtime
+
+DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte)
+NO_DEFAULT_TEMP_ALLOCATOR: bool : ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR
+
+when NO_DEFAULT_TEMP_ALLOCATOR {
+	Default_Temp_Allocator :: struct {}
+	
+	default_temp_allocator_init :: proc(s: ^Default_Temp_Allocator, size: int, backing_allocator := context.allocator) {}
+	
+	default_temp_allocator_destroy :: proc(s: ^Default_Temp_Allocator) {}
+	
+	default_temp_allocator_proc :: nil_allocator_proc
+
+	@(require_results)
+	default_temp_allocator_temp_begin :: proc(loc := #caller_location) -> (temp: Arena_Temp) {
+		return
+	}
+
+	default_temp_allocator_temp_end :: proc(temp: Arena_Temp, loc := #caller_location) {
+	}
+} else {
+	Default_Temp_Allocator :: struct {
+		arena: Arena,
+	}
+	
+	default_temp_allocator_init :: proc(s: ^Default_Temp_Allocator, size: int, backing_allocator := context.allocator) {
+		_ = arena_init(&s.arena, uint(size), backing_allocator)
+	}
+
+	default_temp_allocator_destroy :: proc(s: ^Default_Temp_Allocator) {
+		if s != nil {
+			arena_destroy(&s.arena)
+			s^ = {}
+		}
+	}
+
+	default_temp_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+	                                    size, alignment: int,
+	                                    old_memory: rawptr, old_size: int, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+
+		s := (^Default_Temp_Allocator)(allocator_data)
+		return arena_allocator_proc(&s.arena, mode, size, alignment, old_memory, old_size, loc)
+	}
+
+	@(require_results)
+	default_temp_allocator_temp_begin :: proc(loc := #caller_location) -> (temp: Arena_Temp) {
+		if context.temp_allocator.data == &global_default_temp_allocator_data {
+			temp = arena_temp_begin(&global_default_temp_allocator_data.arena, loc)
+		}
+		return
+	}
+
+	default_temp_allocator_temp_end :: proc(temp: Arena_Temp, loc := #caller_location) {
+		arena_temp_end(temp, loc)
+	}
+
+	@(fini, private)
+	_destroy_temp_allocator_fini :: proc() {
+		default_temp_allocator_destroy(&global_default_temp_allocator_data)
+	}
+}
+
+@(deferred_out=default_temp_allocator_temp_end)
+DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD :: #force_inline proc(ignore := false, loc := #caller_location) -> (Arena_Temp, Source_Code_Location) {
+	if ignore {
+		return {}, loc
+	} else {
+		return default_temp_allocator_temp_begin(loc), loc
+	}
+}
+
+
+default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator {
+	return Allocator{
+		procedure = default_temp_allocator_proc,
+		data      = allocator,
+	}
+}
diff --git a/base/runtime/docs.odin b/base/runtime/docs.odin
new file mode 100644
index 000000000..a520584c5
--- /dev/null
+++ b/base/runtime/docs.odin
@@ -0,0 +1,179 @@
+package runtime
+
+/*
+
+package runtime has numerous entities (declarations) which are required by the compiler to function.
+
+
+## Basic types and calls (and anything they rely on)
+
+Source_Code_Location
+Context
+Allocator
+Logger
+
+__init_context
+_cleanup_runtime
+
+
+## cstring calls
+
+cstring_to_string
+cstring_len
+
+
+
+## Required when RTTI is enabled (the vast majority of targets)
+
+Type_Info
+
+type_table
+__type_info_of
+
+
+## Hashing
+
+default_hasher
+default_hasher_cstring
+default_hasher_string
+
+
+## Pseudo-CRT required procedured due to LLVM but useful in general
+memset
+memcpy
+memove
+
+
+## Procedures required by the LLVM backend
+umodti3
+udivti3
+modti3
+divti3
+fixdfti
+fixunsdfti
+fixunsdfdi
+floattidf
+floattidf_unsigned
+truncsfhf2
+truncdfhf2
+gnu_h2f_ieee
+gnu_f2h_ieee
+extendhfsf2
+__ashlti3 // wasm specific
+__multi3  // wasm specific
+
+
+
+## Required an entry point is defined (i.e. 'main')
+
+args__
+
+
+## When -no-crt is defined (and not a wasm target) (mostly due to LLVM)
+_tls_index
+_fltused
+
+
+## Bounds checking procedures (when not disabled with -no-bounds-check)
+
+bounds_check_error
+matrix_bounds_check_error
+slice_expr_error_hi
+slice_expr_error_lo_hi
+multi_pointer_slice_expr_error
+
+
+## Type assertion check
+
+type_assertion_check
+type_assertion_check2 // takes in typeid
+
+
+## Arithmetic
+
+quo_complex32
+quo_complex64
+quo_complex128
+
+mul_quaternion64
+mul_quaternion128
+mul_quaternion256
+
+quo_quaternion64
+quo_quaternion128
+quo_quaternion256
+
+abs_complex32
+abs_complex64
+abs_complex128
+
+abs_quaternion64
+abs_quaternion128
+abs_quaternion256
+
+
+## Comparison
+
+memory_equal
+memory_compare
+memory_compare_zero
+
+cstring_eq
+cstring_ne
+cstring_lt
+cstring_gt
+cstring_le
+cstring_gt
+
+string_eq
+string_ne
+string_lt
+string_gt
+string_le
+string_gt
+
+complex32_eq
+complex32_ne
+complex64_eq
+complex64_ne
+complex128_eq
+complex128_ne
+
+quaternion64_eq
+quaternion64_ne
+quaternion128_eq
+quaternion128_ne
+quaternion256_eq
+quaternion256_ne
+
+
+## Map specific calls
+
+map_seed_from_map_data
+__dynamic_map_check_grow // static map calls
+map_insert_hash_dynamic  // static map calls
+__dynamic_map_get // dynamic map calls
+__dynamic_map_set // dynamic map calls
+
+
+## Dynamic literals ([dymamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
+
+__dynamic_array_reserve
+__dynamic_array_append
+
+__dynamic_map_reserve
+
+
+## Objective-C specific
+
+objc_lookUpClass
+sel_registerName
+objc_allocateClassPair
+
+
+## for-in `string` type
+
+string_decode_rune
+string_decode_last_rune // #reverse for
+
+*/
+\ No newline at end of file
diff --git a/base/runtime/dynamic_array_internal.odin b/base/runtime/dynamic_array_internal.odin
new file mode 100644
index 000000000..267ee0785
--- /dev/null
+++ b/base/runtime/dynamic_array_internal.odin
@@ -0,0 +1,138 @@
+package runtime
+
+__dynamic_array_make :: proc(array_: rawptr, elem_size, elem_align: int, len, cap: int, loc := #caller_location) {
+	array := (^Raw_Dynamic_Array)(array_)
+	array.allocator = context.allocator
+	assert(array.allocator.procedure != nil)
+
+	if cap > 0 {
+		__dynamic_array_reserve(array_, elem_size, elem_align, cap, loc)
+		array.len = len
+	}
+}
+
+__dynamic_array_reserve :: proc(array_: rawptr, elem_size, elem_align: int, cap: int, loc := #caller_location) -> bool {
+	array := (^Raw_Dynamic_Array)(array_)
+
+	// NOTE(tetra, 2020-01-26): We set the allocator before earlying-out below, because user code is usually written
+	// assuming that appending/reserving will set the allocator, if it is not already set.
+	if array.allocator.procedure == nil {
+		array.allocator = context.allocator
+	}
+	assert(array.allocator.procedure != nil)
+
+	if cap <= array.cap {
+		return true
+	}
+
+	old_size  := array.cap * elem_size
+	new_size  := cap * elem_size
+	allocator := array.allocator
+
+	new_data, err := mem_resize(array.data, old_size, new_size, elem_align, allocator, loc)
+	if err != nil {
+		return false
+	}
+	if elem_size == 0 {
+		array.data = raw_data(new_data)
+		array.cap = cap
+		return true
+	} else if new_data != nil {
+		array.data = raw_data(new_data)
+		array.cap = min(cap, len(new_data)/elem_size)
+		return true
+	}
+	return false
+}
+
+__dynamic_array_shrink :: proc(array_: rawptr, elem_size, elem_align: int, new_cap: int, loc := #caller_location) -> (did_shrink: bool) {
+	array := (^Raw_Dynamic_Array)(array_)
+
+	// NOTE(tetra, 2020-01-26): We set the allocator before earlying-out below, because user code is usually written
+	// assuming that appending/reserving will set the allocator, if it is not already set.
+	if array.allocator.procedure == nil {
+		array.allocator = context.allocator
+	}
+	assert(array.allocator.procedure != nil)
+
+	if new_cap > array.cap {
+		return
+	}
+
+	new_cap := new_cap
+	new_cap = max(new_cap, 0)
+	old_size  := array.cap * elem_size
+	new_size  := new_cap * elem_size
+	allocator := array.allocator
+
+	new_data, err := mem_resize(array.data, old_size, new_size, elem_align, allocator, loc)
+	if err != nil {
+		return
+	}
+
+	array.data = raw_data(new_data)
+	array.len = min(new_cap, array.len)
+	array.cap = new_cap
+	return true
+}
+
+__dynamic_array_resize :: proc(array_: rawptr, elem_size, elem_align: int, len: int, loc := #caller_location) -> bool {
+	array := (^Raw_Dynamic_Array)(array_)
+
+	ok := __dynamic_array_reserve(array_, elem_size, elem_align, len, loc)
+	if ok {
+		array.len = len
+	}
+	return ok
+}
+
+
+__dynamic_array_append :: proc(array_: rawptr, elem_size, elem_align: int,
+                               items: rawptr, item_count: int, loc := #caller_location) -> int {
+	array := (^Raw_Dynamic_Array)(array_)
+
+	if items == nil    {
+		return 0
+	}
+	if item_count <= 0 {
+		return 0
+	}
+
+
+	ok := true
+	if array.cap < array.len+item_count {
+		cap := 2 * array.cap + max(8, item_count)
+		ok = __dynamic_array_reserve(array, elem_size, elem_align, cap, loc)
+	}
+	// TODO(bill): Better error handling for failed reservation
+	if !ok {
+		return array.len
+	}
+
+	assert(array.data != nil)
+	data := uintptr(array.data) + uintptr(elem_size*array.len)
+
+	mem_copy(rawptr(data), items, elem_size * item_count)
+	array.len += item_count
+	return array.len
+}
+
+__dynamic_array_append_nothing :: proc(array_: rawptr, elem_size, elem_align: int, loc := #caller_location) -> int {
+	array := (^Raw_Dynamic_Array)(array_)
+
+	ok := true
+	if array.cap < array.len+1 {
+		cap := 2 * array.cap + max(8, 1)
+		ok = __dynamic_array_reserve(array, elem_size, elem_align, cap, loc)
+	}
+	// TODO(bill): Better error handling for failed reservation
+	if !ok {
+		return array.len
+	}
+
+	assert(array.data != nil)
+	data := uintptr(array.data) + uintptr(elem_size*array.len)
+	mem_zero(rawptr(data), elem_size)
+	array.len += 1
+	return array.len
+}
diff --git a/base/runtime/dynamic_map_internal.odin b/base/runtime/dynamic_map_internal.odin
new file mode 100644
index 000000000..491a7974d
--- /dev/null
+++ b/base/runtime/dynamic_map_internal.odin
@@ -0,0 +1,924 @@
+package runtime
+
+import "core:intrinsics"
+_ :: intrinsics
+
+// High performance, cache-friendly, open-addressed Robin Hood hashing hash map
+// data structure with various optimizations for Odin.
+//
+// Copyright 2022 (c) Dale Weiler
+//
+// The core of the hash map data structure is the Raw_Map struct which is a
+// type-erased representation of the map. This type-erased representation is
+// used in two ways: static and dynamic. When static type information is known,
+// the procedures suffixed with _static should be used instead of _dynamic. The
+// static procedures are optimized since they have type information. Hashing of
+// keys, comparison of keys, and data lookup are all optimized. When type
+// information is not known, the procedures suffixed with _dynamic should be
+// used. The representation of the map is the same for both static and dynamic,
+// and procedures of each can be mixed and matched. The purpose of the dynamic
+// representation is to enable reflection and runtime manipulation of the map.
+// The dynamic procedures all take an additional Map_Info structure parameter
+// which carries runtime values describing the size, alignment, and offset of
+// various traits of a given key and value type pair. The Map_Info value can
+// be created by calling map_info(K, V) with the key and value typeids.
+//
+// This map implementation makes extensive use of uintptr for representing
+// sizes, lengths, capacities, masks, pointers, offsets, and addresses to avoid
+// expensive sign extension and masking that would be generated if types were
+// casted all over. The only place regular ints show up is in the cap() and
+// len() implementations.
+//
+// To make this map cache-friendly it uses a novel strategy to ensure keys and
+// values of the map are always cache-line aligned and that no single key or
+// value of any type ever straddles a cache-line. This cache efficiency makes
+// for quick lookups because the linear-probe always addresses data in a cache
+// friendly way. This is enabled through the use of a special meta-type called
+// a Map_Cell which packs as many values of a given type into a local array adding
+// internal padding to round to MAP_CACHE_LINE_SIZE. One other benefit to storing
+// the internal data in this manner is false sharing no longer occurs when using
+// a map, enabling efficient concurrent access of the map data structure with
+// minimal locking if desired.
+
+// With Robin Hood hashing a maximum load factor of 75% is ideal.
+MAP_LOAD_FACTOR :: 75
+
+// Minimum log2 capacity.
+MAP_MIN_LOG2_CAPACITY :: 3 // 8 elements
+
+// Has to be less than 100% though.
+#assert(MAP_LOAD_FACTOR < 100)
+
+// This is safe to change. The log2 size of a cache-line. At minimum it has to
+// be six though. Higher cache line sizes are permitted.
+MAP_CACHE_LINE_LOG2 :: 6
+
+// The size of a cache-line.
+MAP_CACHE_LINE_SIZE :: 1 << MAP_CACHE_LINE_LOG2
+
+// The minimum cache-line size allowed by this implementation is 64 bytes since
+// we need 6 bits in the base pointer to store the integer log2 capacity, which
+// at maximum is 63. Odin uses signed integers to represent length and capacity,
+// so only 63 bits are needed in the maximum case.
+#assert(MAP_CACHE_LINE_SIZE >= 64)
+
+// Map_Cell type that packs multiple T in such a way to ensure that each T stays
+// aligned by align_of(T) and such that align_of(Map_Cell(T)) % MAP_CACHE_LINE_SIZE == 0
+//
+// This means a value of type T will never straddle a cache-line.
+//
+// When multiple Ts can fit in a single cache-line the data array will have more
+// than one element. When it cannot, the data array will have one element and
+// an array of Map_Cell(T) will be padded to stay a multiple of MAP_CACHE_LINE_SIZE.
+//
+// We rely on the type system to do all the arithmetic and padding for us here.
+//
+// The usual array[index] indexing for []T backed by a []Map_Cell(T) becomes a bit
+// more involved as there now may be internal padding. The indexing now becomes
+//
+//  N :: len(Map_Cell(T){}.data)
+//  i := index / N
+//  j := index % N
+//  cell[i].data[j]
+//
+// However, since len(Map_Cell(T){}.data) is a compile-time constant, there are some
+// optimizations we can do to eliminate the need for any divisions as N will
+// be bounded by [1, 64).
+//
+// In the optimal case, len(Map_Cell(T){}.data) = 1 so the cell array can be treated
+// as a regular array of T, which is the case for hashes.
+Map_Cell :: struct($T: typeid) #align(MAP_CACHE_LINE_SIZE) {
+	data: [MAP_CACHE_LINE_SIZE / size_of(T) when 0 < size_of(T) && size_of(T) < MAP_CACHE_LINE_SIZE else 1]T,
+}
+
+// So we can operate on a cell data structure at runtime without any type
+// information, we have a simple table that stores some traits about the cell.
+//
+// 32-bytes on 64-bit
+// 16-bytes on 32-bit
+Map_Cell_Info :: struct {
+	size_of_type:      uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits
+	align_of_type:     uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits
+	size_of_cell:      uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits
+	elements_per_cell: uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits
+}
+
+// map_cell_info :: proc "contextless" ($T: typeid) -> ^Map_Cell_Info {...}
+map_cell_info :: intrinsics.type_map_cell_info
+
+// Same as the above procedure but at runtime with the cell Map_Cell_Info value.
+@(require_results)
+map_cell_index_dynamic :: #force_inline proc "contextless" (base: uintptr, #no_alias info: ^Map_Cell_Info, index: uintptr) -> uintptr {
+	// Micro-optimize the common cases to save on integer division.
+	elements_per_cell := uintptr(info.elements_per_cell)
+	size_of_cell      := uintptr(info.size_of_cell)
+	switch elements_per_cell {
+	case 1:
+		return base + (index * size_of_cell)
+	case 2:
+		cell_index   := index >> 1
+		data_index   := index & 1
+		size_of_type := uintptr(info.size_of_type)
+		return base + (cell_index * size_of_cell) + (data_index * size_of_type)
+	case:
+		cell_index   := index / elements_per_cell
+		data_index   := index % elements_per_cell
+		size_of_type := uintptr(info.size_of_type)
+		return base + (cell_index * size_of_cell) + (data_index * size_of_type)
+	}
+}
+
+// Same as above procedure but with compile-time constant index.
+@(require_results)
+map_cell_index_dynamic_const :: proc "contextless" (base: uintptr, #no_alias info: ^Map_Cell_Info, $INDEX: uintptr) -> uintptr {
+	elements_per_cell := uintptr(info.elements_per_cell)
+	size_of_cell      := uintptr(info.size_of_cell)
+	size_of_type      := uintptr(info.size_of_type)
+	cell_index        := INDEX / elements_per_cell
+	data_index        := INDEX % elements_per_cell
+	return base + (cell_index * size_of_cell) + (data_index * size_of_type)
+}
+
+// We always round the capacity to a power of two so this becomes [16]Foo, which
+// works out to [4]Cell(Foo).
+//
+// The following compile-time procedure indexes such a [N]Cell(T) structure as
+// if it were a flat array accounting for the internal padding introduced by the
+// Cell structure.
+@(require_results)
+map_cell_index_static :: #force_inline proc "contextless" (cells: [^]Map_Cell($T), index: uintptr) -> ^T #no_bounds_check {
+	N :: size_of(Map_Cell(T){}.data) / size_of(T) when size_of(T) > 0 else 1
+
+	#assert(N <= MAP_CACHE_LINE_SIZE)
+
+	when size_of(Map_Cell(T)) == size_of([N]T) {
+		// No padding case, can treat as a regular array of []T.
+
+		return &([^]T)(cells)[index]
+	} else when (N & (N - 1)) == 0 && N <= 8*size_of(uintptr) {
+		// Likely case, N is a power of two because T is a power of two.
+
+		// Compute the integer log 2 of N, this is the shift amount to index the
+		// correct cell. Odin's intrinsics.count_leading_zeros does not produce a
+		// constant, hence this approach. We only need to check up to N = 64.
+		SHIFT :: 1 when N < 2  else
+		         2 when N < 4  else
+		         3 when N < 8  else
+		         4 when N < 16 else
+		         5 when N < 32 else 6
+		#assert(SHIFT <= MAP_CACHE_LINE_LOG2)
+		// Unique case, no need to index data here since only one element.
+		when N == 1 {
+			return &cells[index >> SHIFT].data[0]
+		} else {
+			return &cells[index >> SHIFT].data[index & (N - 1)]
+		}
+	} else {
+		// Least likely (and worst case), we pay for a division operation but we
+		// assume the compiler does not actually generate a division. N will be in the
+		// range [1, CACHE_LINE_SIZE) and not a power of two.
+		return &cells[index / N].data[index % N]
+	}
+}
+
+// len() for map
+@(require_results)
+map_len :: #force_inline proc "contextless" (m: Raw_Map) -> int {
+	return int(m.len)
+}
+
+// cap() for map
+@(require_results)
+map_cap :: #force_inline proc "contextless" (m: Raw_Map) -> int {
+	// The data uintptr stores the capacity in the lower six bits which gives the
+	// a maximum value of 2^6-1, or 63. We store the integer log2 of capacity
+	// since our capacity is always a power of two. We only need 63 bits as Odin
+	// represents length and capacity as a signed integer.
+	return 0 if m.data == 0 else 1 << map_log2_cap(m)
+}
+
+// Query the load factor of the map. This is not actually configurable, but
+// some math is needed to compute it. Compute it as a fixed point percentage to
+// avoid floating point operations. This division can be optimized out by
+// multiplying by the multiplicative inverse of 100.
+@(require_results)
+map_load_factor :: #force_inline proc "contextless" (log2_capacity: uintptr) -> uintptr {
+	return ((uintptr(1) << log2_capacity) * MAP_LOAD_FACTOR) / 100
+}
+
+@(require_results)
+map_resize_threshold :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr {
+	return map_load_factor(map_log2_cap(m))
+}
+
+// The data stores the log2 capacity in the lower six bits. This is primarily
+// used in the implementation rather than map_cap since the check for data = 0
+// isn't necessary in the implementation. cap() on the otherhand needs to work
+// when called on an empty map.
+@(require_results)
+map_log2_cap :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr {
+	return m.data & (64 - 1)
+}
+
+// Canonicalize the data by removing the tagged capacity stored in the lower six
+// bits of the data uintptr.
+@(require_results)
+map_data :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr {
+	return m.data &~ uintptr(64 - 1)
+}
+
+
+Map_Hash :: uintptr
+
+TOMBSTONE_MASK :: 1<<(size_of(Map_Hash)*8 - 1)
+
+// Procedure to check if a slot is empty for a given hash. This is represented
+// by the zero value to make the zero value useful. This is a procedure just
+// for prose reasons.
+@(require_results)
+map_hash_is_empty :: #force_inline proc "contextless" (hash: Map_Hash) -> bool {
+	return hash == 0
+}
+
+@(require_results)
+map_hash_is_deleted :: #force_no_inline proc "contextless" (hash: Map_Hash) -> bool {
+	// The MSB indicates a tombstone
+	return hash & TOMBSTONE_MASK != 0
+}
+@(require_results)
+map_hash_is_valid :: #force_inline proc "contextless" (hash: Map_Hash) -> bool {
+	// The MSB indicates a tombstone
+	return (hash != 0) & (hash & TOMBSTONE_MASK == 0)
+}
+
+@(require_results)
+map_seed :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr {
+	return map_seed_from_map_data(map_data(m))
+}
+
+// splitmix for uintptr
+@(require_results)
+map_seed_from_map_data :: #force_inline proc "contextless" (data: uintptr) -> uintptr {
+	when size_of(uintptr) == size_of(u64) {
+		mix := data + 0x9e3779b97f4a7c15
+		mix = (mix ~ (mix >> 30)) * 0xbf58476d1ce4e5b9
+		mix = (mix ~ (mix >> 27)) * 0x94d049bb133111eb
+		return mix ~ (mix >> 31)
+	} else {
+		mix := data + 0x9e3779b9
+		mix = (mix ~ (mix >> 16)) * 0x21f0aaad
+		mix = (mix ~ (mix >> 15)) * 0x735a2d97
+		return mix ~ (mix >> 15)
+	}
+}
+
+// Computes the desired position in the array. This is just index % capacity,
+// but a procedure as there's some math involved here to recover the capacity.
+@(require_results)
+map_desired_position :: #force_inline proc "contextless" (m: Raw_Map, hash: Map_Hash) -> uintptr {
+	// We do not use map_cap since we know the capacity will not be zero here.
+	capacity := uintptr(1) << map_log2_cap(m)
+	return uintptr(hash & Map_Hash(capacity - 1))
+}
+
+@(require_results)
+map_probe_distance :: #force_inline proc "contextless" (m: Raw_Map, hash: Map_Hash, slot: uintptr) -> uintptr {
+	// We do not use map_cap since we know the capacity will not be zero here.
+	capacity := uintptr(1) << map_log2_cap(m)
+	return (slot + capacity - map_desired_position(m, hash)) & (capacity - 1)
+}
+
+// When working with the type-erased structure at runtime we need information
+// about the map to make working with it possible. This info structure stores
+// that.
+//
+// `Map_Info` and `Map_Cell_Info` are read only data structures and cannot be
+// modified after creation
+//
+// 32-bytes on 64-bit
+// 16-bytes on 32-bit
+Map_Info :: struct {
+	ks: ^Map_Cell_Info, // 8-bytes on 64-bit, 4-bytes on 32-bit
+	vs: ^Map_Cell_Info, // 8-bytes on 64-bit, 4-bytes on 32-bit
+	key_hasher: proc "contextless" (key: rawptr, seed: Map_Hash) -> Map_Hash, // 8-bytes on 64-bit, 4-bytes on 32-bit
+	key_equal:  proc "contextless" (lhs, rhs: rawptr) -> bool,                // 8-bytes on 64-bit, 4-bytes on 32-bit
+}
+
+
+// The Map_Info structure is basically a pseudo-table of information for a given K and V pair.
+// map_info :: proc "contextless" ($T: typeid/map[$K]$V) -> ^Map_Info {...}
+map_info :: intrinsics.type_map_info
+
+@(require_results)
+map_kvh_data_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info) -> (ks: uintptr, vs: uintptr, hs: [^]Map_Hash, sk: uintptr, sv: uintptr) {
+	INFO_HS := intrinsics.type_map_cell_info(Map_Hash)
+
+	capacity := uintptr(1) << map_log2_cap(m)
+	ks   = map_data(m)
+	vs   = map_cell_index_dynamic(ks,  info.ks, capacity) // Skip past ks to get start of vs
+	hs_ := map_cell_index_dynamic(vs,  info.vs, capacity) // Skip past vs to get start of hs
+	sk   = map_cell_index_dynamic(hs_, INFO_HS, capacity) // Skip past hs to get start of sk
+	// Need to skip past two elements in the scratch key space to get to the start
+	// of the scratch value space, of which there's only two elements as well.
+	sv = map_cell_index_dynamic_const(sk, info.ks, 2)
+
+	hs = ([^]Map_Hash)(hs_)
+	return
+}
+
+@(require_results)
+map_kvh_data_values_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info) -> (vs: uintptr) {
+	capacity := uintptr(1) << map_log2_cap(m)
+	return map_cell_index_dynamic(map_data(m), info.ks, capacity) // Skip past ks to get start of vs
+}
+
+
+@(private, require_results)
+map_total_allocation_size :: #force_inline proc "contextless" (capacity: uintptr, info: ^Map_Info) -> uintptr {
+	round :: #force_inline proc "contextless" (value: uintptr) -> uintptr {
+		CACHE_MASK :: MAP_CACHE_LINE_SIZE - 1
+		return (value + CACHE_MASK) &~ CACHE_MASK
+	}
+	INFO_HS := intrinsics.type_map_cell_info(Map_Hash)
+
+	size := uintptr(0)
+	size = round(map_cell_index_dynamic(size, info.ks, capacity))
+	size = round(map_cell_index_dynamic(size, info.vs, capacity))
+	size = round(map_cell_index_dynamic(size, INFO_HS, capacity))
+	size = round(map_cell_index_dynamic(size, info.ks, 2)) // Two additional ks for scratch storage
+	size = round(map_cell_index_dynamic(size, info.vs, 2)) // Two additional vs for scratch storage
+	return size
+}
+
+// The only procedure which needs access to the context is the one which allocates the map.
+@(require_results)
+map_alloc_dynamic :: proc "odin" (info: ^Map_Info, log2_capacity: uintptr, allocator := context.allocator, loc := #caller_location) -> (result: Raw_Map, err: Allocator_Error) {
+	result.allocator = allocator // set the allocator always
+	if log2_capacity == 0 {
+		return
+	}
+
+	if log2_capacity >= 64 {
+		// Overflowed, would be caused by log2_capacity > 64
+		return {}, .Out_Of_Memory
+	}
+
+	capacity := uintptr(1) << max(log2_capacity, MAP_MIN_LOG2_CAPACITY)
+
+	CACHE_MASK :: MAP_CACHE_LINE_SIZE - 1
+
+	size := map_total_allocation_size(capacity, info)
+
+	data := mem_alloc_non_zeroed(int(size), MAP_CACHE_LINE_SIZE, allocator, loc) or_return
+	data_ptr := uintptr(raw_data(data))
+	if data_ptr == 0 {
+		err = .Out_Of_Memory
+		return
+	}
+	if intrinsics.expect(data_ptr & CACHE_MASK != 0, false) {
+		panic("allocation not aligned to a cache line", loc)
+	} else {
+		result.data = data_ptr | log2_capacity // Tagged pointer representation for capacity.
+		result.len = 0
+
+		map_clear_dynamic(&result, info)
+	}
+	return
+}
+
+// This procedure has to stack allocate storage to store local keys during the
+// Robin Hood hashing technique where elements are swapped in the backing
+// arrays to reduce variance. This swapping can only be done with memcpy since
+// there is no type information.
+//
+// This procedure returns the address of the just inserted value.
+@(require_results)
+map_insert_hash_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, ik: uintptr, iv: uintptr) -> (result: uintptr) {
+	h        := h
+	pos      := map_desired_position(m^, h)
+	distance := uintptr(0)
+	mask     := (uintptr(1) << map_log2_cap(m^)) - 1
+
+	ks, vs, hs, sk, sv := map_kvh_data_dynamic(m^, info)
+
+	// Avoid redundant loads of these values
+	size_of_k := info.ks.size_of_type
+	size_of_v := info.vs.size_of_type
+
+	k := map_cell_index_dynamic(sk, info.ks, 0)
+	v := map_cell_index_dynamic(sv, info.vs, 0)
+	intrinsics.mem_copy_non_overlapping(rawptr(k), rawptr(ik), size_of_k)
+	intrinsics.mem_copy_non_overlapping(rawptr(v), rawptr(iv), size_of_v)
+
+	// Temporary k and v dynamic storage for swap below
+	tk := map_cell_index_dynamic(sk, info.ks, 1)
+	tv := map_cell_index_dynamic(sv, info.vs, 1)
+
+	swap_loop: for {
+		element_hash := hs[pos]
+
+		if map_hash_is_empty(element_hash) {
+			k_dst := map_cell_index_dynamic(ks, info.ks, pos)
+			v_dst := map_cell_index_dynamic(vs, info.vs, pos)
+			intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v)
+			hs[pos] = h
+
+			return result if result != 0 else v_dst
+		}
+
+		if map_hash_is_deleted(element_hash) {
+			break swap_loop
+		}
+
+		if probe_distance := map_probe_distance(m^, element_hash, pos); distance > probe_distance {
+			if result == 0 {
+				result = map_cell_index_dynamic(vs, info.vs, pos)
+			}
+
+			kp := map_cell_index_dynamic(ks, info.ks, pos)
+			vp := map_cell_index_dynamic(vs, info.vs, pos)
+
+			intrinsics.mem_copy_non_overlapping(rawptr(tk), rawptr(k), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(k),  rawptr(kp), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(kp), rawptr(tk), size_of_k)
+
+			intrinsics.mem_copy_non_overlapping(rawptr(tv), rawptr(v), size_of_v)
+			intrinsics.mem_copy_non_overlapping(rawptr(v),  rawptr(vp), size_of_v)
+			intrinsics.mem_copy_non_overlapping(rawptr(vp), rawptr(tv), size_of_v)
+
+			th := h
+			h = hs[pos]
+			hs[pos] = th
+
+			distance = probe_distance
+		}
+
+		pos = (pos + 1) & mask
+		distance += 1
+	}
+
+	// backward shift loop
+	hs[pos] = 0
+	look_ahead: uintptr = 1
+	for {
+		la_pos := (pos + look_ahead) & mask
+		element_hash := hs[la_pos]
+
+		if map_hash_is_deleted(element_hash) {
+			look_ahead += 1
+			hs[la_pos] = 0
+			continue
+		}
+
+		k_dst := map_cell_index_dynamic(ks, info.ks, pos)
+		v_dst := map_cell_index_dynamic(vs, info.vs, pos)
+
+		if map_hash_is_empty(element_hash) {
+			intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v)
+			hs[pos] = h
+
+			return result if result != 0 else v_dst
+		}
+
+		k_src := map_cell_index_dynamic(ks, info.ks, la_pos)
+		v_src := map_cell_index_dynamic(vs, info.vs, la_pos)
+		probe_distance := map_probe_distance(m^, element_hash, la_pos)
+
+		if probe_distance < look_ahead {
+			// probed can be made ideal while placing saved (ending condition)
+			if result == 0 {
+				result = v_dst
+			}
+			intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v)
+			hs[pos] = h
+
+			// This will be an ideal move
+			pos = (la_pos - probe_distance) & mask
+			look_ahead -= probe_distance
+
+			// shift until we hit ideal/empty
+			for probe_distance != 0 {
+				k_dst = map_cell_index_dynamic(ks, info.ks, pos)
+				v_dst = map_cell_index_dynamic(vs, info.vs, pos)
+
+				intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k_src), size_of_k)
+				intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v_src), size_of_v)
+				hs[pos] = element_hash
+				hs[la_pos] = 0
+
+				pos = (pos + 1) & mask
+				la_pos = (la_pos + 1) & mask
+				look_ahead = (la_pos - pos) & mask
+				element_hash = hs[la_pos]
+				if map_hash_is_empty(element_hash) {
+					return
+				}
+
+				probe_distance = map_probe_distance(m^, element_hash, la_pos)
+				if probe_distance == 0 {
+					return
+				}
+				// can be ideal?
+				if probe_distance < look_ahead {
+					pos = (la_pos - probe_distance) & mask
+				}
+				k_src = map_cell_index_dynamic(ks, info.ks, la_pos)
+				v_src = map_cell_index_dynamic(vs, info.vs, la_pos)
+			}
+			return
+		} else if distance < probe_distance - look_ahead {
+			// shift back probed
+			intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k_src), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v_src), size_of_v)
+			hs[pos] = element_hash
+			hs[la_pos] = 0
+		} else {
+			// place saved, save probed
+			if result == 0 {
+				result = v_dst
+			}
+			intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v)
+			hs[pos] = h
+
+			intrinsics.mem_copy_non_overlapping(rawptr(k), rawptr(k_src), size_of_k)
+			intrinsics.mem_copy_non_overlapping(rawptr(v), rawptr(v_src), size_of_v)
+			h = hs[la_pos]
+			hs[la_pos] = 0
+			distance = probe_distance - look_ahead
+		}
+
+		pos = (pos + 1) & mask
+		distance += 1
+	}
+}
+
+@(require_results)
+map_grow_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
+	log2_capacity := map_log2_cap(m^)
+	new_capacity := uintptr(1) << max(log2_capacity + 1, MAP_MIN_LOG2_CAPACITY)
+	return map_reserve_dynamic(m, info, new_capacity, loc)
+}
+
+
+@(require_results)
+map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
+	@(require_results)
+	ceil_log2 :: #force_inline proc "contextless" (x: uintptr) -> uintptr {
+		z := intrinsics.count_leading_zeros(x)
+		if z > 0 && x & (x-1) != 0 {
+			z -= 1
+		}
+		return size_of(uintptr)*8 - 1 - z
+	}
+
+	if m.allocator.procedure == nil {
+		m.allocator = context.allocator
+	}
+
+	new_capacity := new_capacity
+	old_capacity := uintptr(map_cap(m^))
+
+	if old_capacity >= new_capacity {
+		return nil
+	}
+
+	// ceiling nearest power of two
+	log2_new_capacity := ceil_log2(new_capacity)
+
+	log2_min_cap := max(MAP_MIN_LOG2_CAPACITY, log2_new_capacity)
+
+	if m.data == 0 {
+		m^ = map_alloc_dynamic(info, log2_min_cap, m.allocator, loc) or_return
+		return nil
+	}
+
+	resized := map_alloc_dynamic(info, log2_min_cap, m.allocator, loc) or_return
+
+	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
+
+	// Cache these loads to avoid hitting them in the for loop.
+	n := m.len
+	for i in 0..<old_capacity {
+		hash := hs[i]
+		if map_hash_is_empty(hash) {
+			continue
+		}
+		if map_hash_is_deleted(hash) {
+			continue
+		}
+		k := map_cell_index_dynamic(ks, info.ks, i)
+		v := map_cell_index_dynamic(vs, info.vs, i)
+		hash = info.key_hasher(rawptr(k), map_seed(resized))
+		_ = map_insert_hash_dynamic(&resized, info, hash, k, v)
+		// Only need to do this comparison on each actually added pair, so do not
+		// fold it into the for loop comparator as a micro-optimization.
+		n -= 1
+		if n == 0 {
+			break
+		}
+	}
+
+	map_free_dynamic(m^, info, loc) or_return
+	m.data = resized.data
+	return nil
+}
+
+
+@(require_results)
+map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
+	if m.allocator.procedure == nil {
+		m.allocator = context.allocator
+	}
+
+	// Cannot shrink the capacity if the number of items in the map would exceed
+	// one minus the current log2 capacity's resize threshold. That is the shrunk
+	// map needs to be within the max load factor.
+	log2_capacity := map_log2_cap(m^)
+	if uintptr(m.len) >= map_load_factor(log2_capacity - 1) {
+		return false, nil
+	}
+
+	shrunk := map_alloc_dynamic(info, log2_capacity - 1, m.allocator) or_return
+
+	capacity := uintptr(1) << log2_capacity
+
+	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
+
+	n := m.len
+	for i in 0..<capacity {
+		hash := hs[i]
+		if map_hash_is_empty(hash) {
+			continue
+		}
+		if map_hash_is_deleted(hash) {
+			continue
+		}
+
+		k := map_cell_index_dynamic(ks, info.ks, i)
+		v := map_cell_index_dynamic(vs, info.vs, i)
+		hash = info.key_hasher(rawptr(k), map_seed(shrunk))
+		_ = map_insert_hash_dynamic(&shrunk, info, hash, k, v)
+		// Only need to do this comparison on each actually added pair, so do not
+		// fold it into the for loop comparator as a micro-optimization.
+		n -= 1
+		if n == 0 {
+			break
+		}
+	}
+
+	map_free_dynamic(m^, info, loc) or_return
+	m.data = shrunk.data
+	return true, nil
+}
+
+@(require_results)
+map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
+	ptr := rawptr(map_data(m))
+	size := int(map_total_allocation_size(uintptr(map_cap(m)), info))
+	err := mem_free_with_size(ptr, size, m.allocator, loc)
+	#partial switch err {
+	case .None, .Mode_Not_Implemented:
+		return nil
+	}
+	return err
+}
+
+@(require_results)
+map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
+	if map_len(m) == 0 {
+		return 0, false
+	}
+	h := info.key_hasher(rawptr(k), map_seed(m))
+	p := map_desired_position(m, h)
+	d := uintptr(0)
+	c := (uintptr(1) << map_log2_cap(m)) - 1
+	ks, _, hs, _, _ := map_kvh_data_dynamic(m, info)
+	for {
+		element_hash := hs[p]
+		if map_hash_is_empty(element_hash) {
+			return 0, false
+		} else if d > map_probe_distance(m, element_hash, p) {
+			return 0, false
+		} else if element_hash == h && info.key_equal(rawptr(k), rawptr(map_cell_index_dynamic(ks, info.ks, p))) {
+			return p, true
+		}
+		p = (p + 1) & c
+		d += 1
+	}
+}
+@(require_results)
+map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
+	if map_len(m) == 0 {
+		return false
+	}
+	h := info.key_hasher(rawptr(k), map_seed(m))
+	p := map_desired_position(m, h)
+	d := uintptr(0)
+	c := (uintptr(1) << map_log2_cap(m)) - 1
+	ks, _, hs, _, _ := map_kvh_data_dynamic(m, info)
+	for {
+		element_hash := hs[p]
+		if map_hash_is_empty(element_hash) {
+			return false
+		} else if d > map_probe_distance(m, element_hash, p) {
+			return false
+		} else if element_hash == h && info.key_equal(rawptr(k), rawptr(map_cell_index_dynamic(ks, info.ks, p))) {
+			return true
+		}
+		p = (p + 1) & c
+		d += 1
+	}
+}
+
+
+
+@(require_results)
+map_erase_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
+	index := map_lookup_dynamic(m^, info, k) or_return
+	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
+	hs[index] |= TOMBSTONE_MASK
+	old_k = map_cell_index_dynamic(ks, info.ks, index)
+	old_v = map_cell_index_dynamic(vs, info.vs, index)
+	m.len -= 1
+	ok = true
+
+	mask := (uintptr(1)<<map_log2_cap(m^)) - 1
+	curr_index := uintptr(index)
+	next_index := (curr_index + 1) & mask
+
+	// if the next element is empty or has zero probe distance, then any lookup
+	// will always fail on the next, so we can clear both of them
+	hash := hs[next_index]
+	if map_hash_is_empty(hash) || map_probe_distance(m^, hash, next_index) == 0 {
+		hs[curr_index] = 0
+	} else {
+		hs[curr_index] |= TOMBSTONE_MASK
+	}
+
+	return
+}
+
+map_clear_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info) {
+	if m.data == 0 {
+		return
+	}
+	_, _, hs, _, _ := map_kvh_data_dynamic(m^, info)
+	intrinsics.mem_zero(rawptr(hs), map_cap(m^) * size_of(Map_Hash))
+	m.len = 0
+}
+
+
+@(require_results)
+map_kvh_data_static :: #force_inline proc "contextless" (m: $T/map[$K]$V) -> (ks: [^]Map_Cell(K), vs: [^]Map_Cell(V), hs: [^]Map_Hash) {
+	capacity := uintptr(cap(m))
+	ks = ([^]Map_Cell(K))(map_data(transmute(Raw_Map)m))
+	vs = ([^]Map_Cell(V))(map_cell_index_static(ks, capacity))
+	hs = ([^]Map_Hash)(map_cell_index_static(vs, capacity))
+	return
+}
+
+
+@(require_results)
+map_get :: proc "contextless" (m: $T/map[$K]$V, key: K) -> (stored_key: K, stored_value: V, ok: bool) {
+	rm := transmute(Raw_Map)m
+	if rm.len == 0 {
+		return
+	}
+	info := intrinsics.type_map_info(T)
+	key := key
+
+	h := info.key_hasher(&key, map_seed(rm))
+	pos := map_desired_position(rm, h)
+	distance := uintptr(0)
+	mask := (uintptr(1) << map_log2_cap(rm)) - 1
+	ks, vs, hs := map_kvh_data_static(m)
+	for {
+		element_hash := hs[pos]
+		if map_hash_is_empty(element_hash) {
+			return
+		} else if distance > map_probe_distance(rm, element_hash, pos) {
+			return
+		} else if element_hash == h {
+			element_key := map_cell_index_static(ks, pos)
+			if info.key_equal(&key, rawptr(element_key)) {
+				element_value := map_cell_index_static(vs, pos)
+				stored_key   = (^K)(element_key)^
+				stored_value = (^V)(element_value)^
+				ok = true
+				return
+			}
+
+		}
+		pos = (pos + 1) & mask
+		distance += 1
+	}
+}
+
+// IMPORTANT: USED WITHIN THE COMPILER
+__dynamic_map_get :: proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, key: rawptr) -> (ptr: rawptr) {
+	if m.len == 0 {
+		return nil
+	}
+	pos := map_desired_position(m^, h)
+	distance := uintptr(0)
+	mask := (uintptr(1) << map_log2_cap(m^)) - 1
+	ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
+	for {
+		element_hash := hs[pos]
+		if map_hash_is_empty(element_hash) {
+			return nil
+		} else if distance > map_probe_distance(m^, element_hash, pos) {
+			return nil
+		} else if element_hash == h && info.key_equal(key, rawptr(map_cell_index_dynamic(ks, info.ks, pos))) {
+			return rawptr(map_cell_index_dynamic(vs, info.vs, pos))
+		}
+		pos = (pos + 1) & mask
+		distance += 1
+	}
+}
+
+// IMPORTANT: USED WITHIN THE COMPILER
+__dynamic_map_check_grow :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (err: Allocator_Error, has_grown: bool) {
+	if m.len >= map_resize_threshold(m^) {
+		return map_grow_dynamic(m, info, loc), true
+	}
+	return nil, false
+}
+
+__dynamic_map_set_without_hash :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, key, value: rawptr, loc := #caller_location) -> rawptr {
+	return __dynamic_map_set(m, info, info.key_hasher(key, map_seed(m^)), key, value, loc)
+}
+
+
+// IMPORTANT: USED WITHIN THE COMPILER
+__dynamic_map_set :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, hash: Map_Hash, key, value: rawptr, loc := #caller_location) -> rawptr {
+	if found := __dynamic_map_get(m, info, hash, key); found != nil {
+		intrinsics.mem_copy_non_overlapping(found, value, info.vs.size_of_type)
+		return found
+	}
+
+	hash := hash
+	err, has_grown := __dynamic_map_check_grow(m, info, loc)
+	if err != nil {
+		return nil
+	}
+	if has_grown {
+		hash = info.key_hasher(key, map_seed(m^))
+	}
+
+	result := map_insert_hash_dynamic(m, info, hash, uintptr(key), uintptr(value))
+	m.len += 1
+	return rawptr(result)
+}
+
+// IMPORTANT: USED WITHIN THE COMPILER
+@(private)
+__dynamic_map_reserve :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uint, loc := #caller_location) -> Allocator_Error {
+	return map_reserve_dynamic(m, info, uintptr(new_capacity), loc)
+}
+
+
+
+// NOTE: the default hashing algorithm derives from fnv64a, with some minor modifications to work for `map` type:
+//
+//     * Convert a `0` result to `1`
+//         * "empty entry"
+//     * Prevent the top bit from being set
+//         * "deleted entry"
+//
+// Both of these modification are necessary for the implementation of the `map`
+
+INITIAL_HASH_SEED :: 0xcbf29ce484222325
+
+HASH_MASK :: 1 << (8*size_of(uintptr) - 1) -1
+
+default_hasher :: #force_inline proc "contextless" (data: rawptr, seed: uintptr, N: int) -> uintptr {
+	h := u64(seed) + INITIAL_HASH_SEED
+	p := ([^]byte)(data)
+	for _ in 0..<N {
+		h = (h ~ u64(p[0])) * 0x100000001b3
+		p = p[1:]
+	}
+	h &= HASH_MASK
+	return uintptr(h) | uintptr(uintptr(h) == 0)
+}
+
+default_hasher_string :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr {
+	str := (^[]byte)(data)
+	return default_hasher(raw_data(str^), seed, len(str))
+}
+default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr {
+	h := u64(seed) + INITIAL_HASH_SEED
+	if ptr := (^[^]byte)(data)^; ptr != nil {
+		for ptr[0] != 0 {
+			h = (h ~ u64(ptr[0])) * 0x100000001b3
+			ptr = ptr[1:]
+		}
+	}
+	h &= HASH_MASK
+	return uintptr(h) | uintptr(uintptr(h) == 0)
+}
diff --git a/base/runtime/entry_unix.odin b/base/runtime/entry_unix.odin
new file mode 100644
index 000000000..f494a509e
--- /dev/null
+++ b/base/runtime/entry_unix.odin
@@ -0,0 +1,59 @@
+//+private
+//+build linux, darwin, freebsd, openbsd
+//+no-instrumentation
+package runtime
+
+import "core:intrinsics"
+
+when ODIN_BUILD_MODE == .Dynamic {
+	@(link_name="_odin_entry_point", linkage="strong", require/*, link_section=".init"*/)
+	_odin_entry_point :: proc "c" () {
+		context = default_context()
+		#force_no_inline _startup_runtime()
+		intrinsics.__entry_point()
+	}
+	@(link_name="_odin_exit_point", linkage="strong", require/*, link_section=".fini"*/)
+	_odin_exit_point :: proc "c" () {
+		context = default_context()
+		#force_no_inline _cleanup_runtime()
+	}
+	@(link_name="main", linkage="strong", require)
+	main :: proc "c" (argc: i32, argv: [^]cstring) -> i32 {
+		return 0
+	}
+} else when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
+	when ODIN_NO_CRT {
+		// NOTE(flysand): We need to start from assembly because we need
+		// to retrieve argc and argv from the stack
+		when ODIN_ARCH == .amd64 {
+			@require foreign import entry "entry_unix_no_crt_amd64.asm"
+			SYS_exit :: 60
+		} else when ODIN_ARCH == .i386 {
+			@require foreign import entry "entry_unix_no_crt_i386.asm"
+			SYS_exit :: 1
+		} else when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
+			@require foreign import entry "entry_unix_no_crt_darwin_arm64.asm"
+			SYS_exit :: 1
+		}
+		@(link_name="_start_odin", linkage="strong", require)
+		_start_odin :: proc "c" (argc: i32, argv: [^]cstring) -> ! {
+			args__ = argv[:argc]
+			context = default_context()
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+			#force_no_inline _cleanup_runtime()
+			intrinsics.syscall(SYS_exit, 0)
+			unreachable()
+		}
+	} else {
+		@(link_name="main", linkage="strong", require)
+		main :: proc "c" (argc: i32, argv: [^]cstring) -> i32 {
+			args__ = argv[:argc]
+			context = default_context()
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+			#force_no_inline _cleanup_runtime()
+			return 0
+		}
+	}
+}
diff --git a/base/runtime/entry_unix_no_crt_amd64.asm b/base/runtime/entry_unix_no_crt_amd64.asm
new file mode 100644
index 000000000..f0bdce8d7
--- /dev/null
+++ b/base/runtime/entry_unix_no_crt_amd64.asm
@@ -0,0 +1,43 @@
+bits 64
+
+extern _start_odin
+global _start
+
+section .text
+
+;; Entry point for programs that specify -no-crt option
+;; This entry point should be compatible with dynamic loaders on linux
+;; The parameters the dynamic loader passes to the _start function:
+;;    RDX = pointer to atexit function
+;; The stack layout is as follows:
+;;    +-------------------+
+;;            NULL
+;;    +-------------------+
+;;           envp[m]
+;;    +-------------------+
+;;            ...
+;;    +-------------------+
+;;           envp[0]
+;;    +-------------------+
+;;            NULL
+;;    +-------------------+
+;;           argv[n]
+;;    +-------------------+
+;;            ...
+;;    +-------------------+
+;;           argv[0]
+;;    +-------------------+
+;;            argc
+;;    +-------------------+ <------ RSP
+;;
+_start:
+    ;; Mark stack frame as the top of the stack
+    xor rbp, rbp
+    ;; Load argc into 1st param reg, argv into 2nd param reg
+    pop rdi
+    mov rdx, rsi
+    ;; Align stack pointer down to 16-bytes (sysv calling convention)
+    and rsp, -16
+    ;; Call into odin entry point
+    call _start_odin
+    jmp $$
+\ No newline at end of file
diff --git a/base/runtime/entry_unix_no_crt_darwin_arm64.asm b/base/runtime/entry_unix_no_crt_darwin_arm64.asm
new file mode 100644
index 000000000..0f71fbdf8
--- /dev/null
+++ b/base/runtime/entry_unix_no_crt_darwin_arm64.asm
@@ -0,0 +1,20 @@
+	.section __TEXT,__text
+
+	; NOTE(laytan): this should ideally be the -minimum-os-version flag but there is no nice way of preprocessing assembly in Odin.
+	; 10 seems to be the lowest it goes and I don't see it mess with any targeted os version so this seems fine.
+	.build_version macos, 10, 0
+
+	.extern __start_odin
+
+	.global _main
+	.align 2
+_main:
+	mov x5, sp       ; use x5 as the stack pointer
+
+	str x0, [x5]     ; get argc into x0 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
+	str x1, [x5, #8] ; get argv into x1
+
+	and sp, x5, #~15 ; force 16-byte alignment of the stack
+	
+	bl __start_odin  ; call into Odin entry point
+	ret              ; should never get here
diff --git a/base/runtime/entry_unix_no_crt_i386.asm b/base/runtime/entry_unix_no_crt_i386.asm
new file mode 100644
index 000000000..a61d56a16
--- /dev/null
+++ b/base/runtime/entry_unix_no_crt_i386.asm
@@ -0,0 +1,18 @@
+bits 32
+
+extern _start_odin
+global _start
+
+section .text
+
+;; NOTE(flysand): For description see the corresponding *_amd64.asm file
+;; also I didn't test this on x86-32
+_start:
+    xor ebp, rbp
+    pop ecx
+    mov eax, esp
+    and esp, -16
+    push eax
+    push ecx
+    call _start_odin
+    jmp $$
+\ No newline at end of file
diff --git a/base/runtime/entry_wasm.odin b/base/runtime/entry_wasm.odin
new file mode 100644
index 000000000..e7f3f156f
--- /dev/null
+++ b/base/runtime/entry_wasm.odin
@@ -0,0 +1,20 @@
+//+private
+//+build wasm32, wasm64p32
+//+no-instrumentation
+package runtime
+
+import "core:intrinsics"
+
+when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
+	@(link_name="_start", linkage="strong", require, export)
+	_start :: proc "c" () {
+		context = default_context()
+		#force_no_inline _startup_runtime()
+		intrinsics.__entry_point()
+	}
+	@(link_name="_end", linkage="strong", require, export)
+	_end :: proc "c" () {
+		context = default_context()
+		#force_no_inline _cleanup_runtime()
+	}
+}
+\ No newline at end of file
diff --git a/base/runtime/entry_windows.odin b/base/runtime/entry_windows.odin
new file mode 100644
index 000000000..b6fbe1dcc
--- /dev/null
+++ b/base/runtime/entry_windows.odin
@@ -0,0 +1,50 @@
+//+private
+//+build windows
+//+no-instrumentation
+package runtime
+
+import "core:intrinsics"
+
+when ODIN_BUILD_MODE == .Dynamic {
+	@(link_name="DllMain", linkage="strong", require)
+	DllMain :: proc "system" (hinstDLL: rawptr, fdwReason: u32, lpReserved: rawptr) -> b32 {
+		context = default_context()
+
+		// Populate Windows DLL-specific global
+		dll_forward_reason = DLL_Forward_Reason(fdwReason)
+
+		switch dll_forward_reason {
+		case .Process_Attach:
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+		case .Process_Detach:
+			#force_no_inline _cleanup_runtime()
+		case .Thread_Attach:
+			break
+		case .Thread_Detach:
+			break
+		}
+		return true
+	}
+} else when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
+	when ODIN_ARCH == .i386 || ODIN_NO_CRT {
+		@(link_name="mainCRTStartup", linkage="strong", require)
+		mainCRTStartup :: proc "system" () -> i32 {
+			context = default_context()
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+			#force_no_inline _cleanup_runtime()
+			return 0
+		}
+	} else {
+		@(link_name="main", linkage="strong", require)
+		main :: proc "c" (argc: i32, argv: [^]cstring) -> i32 {
+			args__ = argv[:argc]
+			context = default_context()
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+			#force_no_inline _cleanup_runtime()
+			return 0
+		}
+	}
+}
+\ No newline at end of file
diff --git a/base/runtime/error_checks.odin b/base/runtime/error_checks.odin
new file mode 100644
index 000000000..ea6333c29
--- /dev/null
+++ b/base/runtime/error_checks.odin
@@ -0,0 +1,292 @@
+package runtime
+
+@(no_instrumentation)
+bounds_trap :: proc "contextless" () -> ! {
+	when ODIN_OS == .Windows {
+		windows_trap_array_bounds()
+	} else {
+		trap()
+	}
+}
+
+@(no_instrumentation)
+type_assertion_trap :: proc "contextless" () -> ! {
+	when ODIN_OS == .Windows {
+		windows_trap_type_assertion()
+	} else {
+		trap()
+	}
+}
+
+
+bounds_check_error :: proc "contextless" (file: string, line, column: i32, index, count: int) {
+	if uint(index) < uint(count) {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (file: string, line, column: i32, index, count: int) -> ! {
+		print_caller_location(Source_Code_Location{file, line, column, ""})
+		print_string(" Index ")
+		print_i64(i64(index))
+		print_string(" is out of range 0..<")
+		print_i64(i64(count))
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(file, line, column, index, count)
+}
+
+@(no_instrumentation)
+slice_handle_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int, len: int) -> ! {
+	print_caller_location(Source_Code_Location{file, line, column, ""})
+	print_string(" Invalid slice indices ")
+	print_i64(i64(lo))
+	print_string(":")
+	print_i64(i64(hi))
+	print_string(" is out of range 0..<")
+	print_i64(i64(len))
+	print_byte('\n')
+	bounds_trap()
+}
+
+@(no_instrumentation)
+multi_pointer_slice_handle_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int) -> ! {
+	print_caller_location(Source_Code_Location{file, line, column, ""})
+	print_string(" Invalid slice indices ")
+	print_i64(i64(lo))
+	print_string(":")
+	print_i64(i64(hi))
+	print_byte('\n')
+	bounds_trap()
+}
+
+
+multi_pointer_slice_expr_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int) {
+	if lo <= hi {
+		return
+	}
+	multi_pointer_slice_handle_error(file, line, column, lo, hi)
+}
+
+slice_expr_error_hi :: proc "contextless" (file: string, line, column: i32, hi: int, len: int) {
+	if 0 <= hi && hi <= len {
+		return
+	}
+	slice_handle_error(file, line, column, 0, hi, len)
+}
+
+slice_expr_error_lo_hi :: proc "contextless" (file: string, line, column: i32, lo, hi: int, len: int) {
+	if 0 <= lo && lo <= len && lo <= hi && hi <= len {
+		return
+	}
+	slice_handle_error(file, line, column, lo, hi, len)
+}
+
+dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32, low, high, max: int) {
+	if 0 <= low && low <= high && high <= max {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (file: string, line, column: i32, low, high, max: int) -> ! {
+		print_caller_location(Source_Code_Location{file, line, column, ""})
+		print_string(" Invalid dynamic array indices ")
+		print_i64(i64(low))
+		print_string(":")
+		print_i64(i64(high))
+		print_string(" is out of range 0..<")
+		print_i64(i64(max))
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(file, line, column, low, high, max)
+}
+
+
+matrix_bounds_check_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) {
+	if uint(row_index) < uint(row_count) &&
+	   uint(column_index) < uint(column_count) {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) -> ! {
+		print_caller_location(Source_Code_Location{file, line, column, ""})
+		print_string(" Matrix indices [")
+		print_i64(i64(row_index))
+		print_string(", ")
+		print_i64(i64(column_index))
+		print_string(" is out of range [0..<")
+		print_i64(i64(row_count))
+		print_string(", 0..<")
+		print_i64(i64(column_count))
+		print_string("]")
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(file, line, column, row_index, column_index, row_count, column_count)
+}
+
+
+when ODIN_NO_RTTI {
+	type_assertion_check :: proc "contextless" (ok: bool, file: string, line, column: i32) {
+		if ok {
+			return
+		}
+		@(cold, no_instrumentation)
+		handle_error :: proc "contextless" (file: string, line, column: i32) -> ! {
+			print_caller_location(Source_Code_Location{file, line, column, ""})
+			print_string(" Invalid type assertion\n")
+			type_assertion_trap()
+		}
+		handle_error(file, line, column)
+	}
+
+	type_assertion_check2 :: proc "contextless" (ok: bool, file: string, line, column: i32) {
+		if ok {
+			return
+		}
+		@(cold, no_instrumentation)
+		handle_error :: proc "contextless" (file: string, line, column: i32) -> ! {
+			print_caller_location(Source_Code_Location{file, line, column, ""})
+			print_string(" Invalid type assertion\n")
+			type_assertion_trap()
+		}
+		handle_error(file, line, column)
+	}
+} else {
+	type_assertion_check :: proc "contextless" (ok: bool, file: string, line, column: i32, from, to: typeid) {
+		if ok {
+			return
+		}
+		@(cold, no_instrumentation)
+		handle_error :: proc "contextless" (file: string, line, column: i32, from, to: typeid) -> ! {
+			print_caller_location(Source_Code_Location{file, line, column, ""})
+			print_string(" Invalid type assertion from ")
+			print_typeid(from)
+			print_string(" to ")
+			print_typeid(to)
+			print_byte('\n')
+			type_assertion_trap()
+		}
+		handle_error(file, line, column, from, to)
+	}
+
+	type_assertion_check2 :: proc "contextless" (ok: bool, file: string, line, column: i32, from, to: typeid, from_data: rawptr) {
+		if ok {
+			return
+		}
+
+		variant_type :: proc "contextless" (id: typeid, data: rawptr) -> typeid {
+			if id == nil || data == nil {
+				return id
+			}
+			ti := type_info_base(type_info_of(id))
+			#partial switch v in ti.variant {
+			case Type_Info_Any:
+				return (^any)(data).id
+			case Type_Info_Union:
+				tag_ptr := uintptr(data) + v.tag_offset
+				idx := 0
+				switch v.tag_type.size {
+				case 1:  idx = int((^u8)(tag_ptr)^)   - 1
+				case 2:  idx = int((^u16)(tag_ptr)^)  - 1
+				case 4:  idx = int((^u32)(tag_ptr)^)  - 1
+				case 8:  idx = int((^u64)(tag_ptr)^)  - 1
+				case 16: idx = int((^u128)(tag_ptr)^) - 1
+				}
+				if idx < 0 {
+					return nil
+				} else if idx < len(v.variants) {
+					return v.variants[idx].id
+				}
+			}
+			return id
+		}
+
+		@(cold, no_instrumentation)
+		handle_error :: proc "contextless" (file: string, line, column: i32, from, to: typeid, from_data: rawptr) -> ! {
+
+			actual := variant_type(from, from_data)
+
+			print_caller_location(Source_Code_Location{file, line, column, ""})
+			print_string(" Invalid type assertion from ")
+			print_typeid(from)
+			print_string(" to ")
+			print_typeid(to)
+			if actual != from {
+				print_string(", actual type: ")
+				print_typeid(actual)
+			}
+			print_byte('\n')
+			type_assertion_trap()
+		}
+		handle_error(file, line, column, from, to, from_data)
+	}
+}
+
+
+make_slice_error_loc :: #force_inline proc "contextless" (loc := #caller_location, len: int) {
+	if 0 <= len {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (loc: Source_Code_Location, len: int) -> ! {
+		print_caller_location(loc)
+		print_string(" Invalid slice length for make: ")
+		print_i64(i64(len))
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(loc, len)
+}
+
+make_dynamic_array_error_loc :: #force_inline proc "contextless" (loc := #caller_location, len, cap: int) {
+	if 0 <= len && len <= cap {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (loc: Source_Code_Location, len, cap: int)  -> ! {
+		print_caller_location(loc)
+		print_string(" Invalid dynamic array parameters for make: ")
+		print_i64(i64(len))
+		print_byte(':')
+		print_i64(i64(cap))
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(loc, len, cap)
+}
+
+make_map_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_location, cap: int) {
+	if 0 <= cap {
+		return
+	}
+	@(cold, no_instrumentation)
+	handle_error :: proc "contextless" (loc: Source_Code_Location, cap: int)  -> ! {
+		print_caller_location(loc)
+		print_string(" Invalid map capacity for make: ")
+		print_i64(i64(cap))
+		print_byte('\n')
+		bounds_trap()
+	}
+	handle_error(loc, cap)
+}
+
+
+
+
+
+bounds_check_error_loc :: #force_inline proc "contextless" (loc := #caller_location, index, count: int) {
+	bounds_check_error(loc.file_path, loc.line, loc.column, index, count)
+}
+
+slice_expr_error_hi_loc :: #force_inline proc "contextless" (loc := #caller_location, hi: int, len: int) {
+	slice_expr_error_hi(loc.file_path, loc.line, loc.column, hi, len)
+}
+
+slice_expr_error_lo_hi_loc :: #force_inline proc "contextless" (loc := #caller_location, lo, hi: int, len: int) {
+	slice_expr_error_lo_hi(loc.file_path, loc.line, loc.column, lo, hi, len)
+}
+
+dynamic_array_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_location, low, high, max: int) {
+	dynamic_array_expr_error(loc.file_path, loc.line, loc.column, low, high, max)
+}
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin
new file mode 100644
index 000000000..a03c2a701
--- /dev/null
+++ b/base/runtime/internal.odin
@@ -0,0 +1,1036 @@
+package runtime
+
+import "core:intrinsics"
+
+@(private="file")
+IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+
+@(private)
+RUNTIME_LINKAGE :: "strong" when (
+	(ODIN_USE_SEPARATE_MODULES || 
+	ODIN_BUILD_MODE == .Dynamic ||
+	!ODIN_NO_CRT) &&
+	!IS_WASM) else "internal"
+RUNTIME_REQUIRE :: !ODIN_TILDE
+
+@(private)
+__float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16
+
+
+@(private)
+byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte #no_bounds_check {
+	return ([^]byte)(data)[:max(len, 0)]
+}
+
+is_power_of_two_int :: #force_inline proc(x: int) -> bool {
+	if x <= 0 {
+		return false
+	}
+	return (x & (x-1)) == 0
+}
+
+align_forward_int :: #force_inline proc(ptr, align: int) -> int {
+	assert(is_power_of_two_int(align))
+
+	p := ptr
+	modulo := p & (align-1)
+	if modulo != 0 {
+		p += align - modulo
+	}
+	return p
+}
+
+is_power_of_two_uintptr :: #force_inline proc(x: uintptr) -> bool {
+	if x <= 0 {
+		return false
+	}
+	return (x & (x-1)) == 0
+}
+
+align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
+	assert(is_power_of_two_uintptr(align))
+
+	p := ptr
+	modulo := p & (align-1)
+	if modulo != 0 {
+		p += align - modulo
+	}
+	return p
+}
+
+mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
+	if data == nil {
+		return nil
+	}
+	if len <= 0 {
+		return data
+	}
+	intrinsics.mem_zero(data, len)
+	return data
+}
+
+mem_copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
+	if src != nil && dst != src && len > 0 {
+		// NOTE(bill): This _must_ be implemented like C's memmove
+		intrinsics.mem_copy(dst, src, len)
+	}
+	return dst
+}
+
+mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
+	if src != nil && dst != src && len > 0 {
+		// NOTE(bill): This _must_ be implemented like C's memcpy
+		intrinsics.mem_copy_non_overlapping(dst, src, len)
+	}
+	return dst
+}
+
+DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
+
+mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	if size == 0 {
+		return nil, nil
+	}
+	if allocator.procedure == nil {
+		return nil, nil
+	}
+	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
+}
+
+mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	if size == 0 || allocator.procedure == nil {
+		return nil, nil
+	}
+	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
+}
+
+mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	if size == 0 || allocator.procedure == nil {
+		return nil, nil
+	}
+	return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, size, alignment, nil, 0, loc)
+}
+
+mem_free :: #force_inline proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	if ptr == nil || allocator.procedure == nil {
+		return nil
+	}
+	_, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, 0, loc)
+	return err
+}
+
+mem_free_with_size :: #force_inline proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	if ptr == nil || allocator.procedure == nil {
+		return nil
+	}
+	_, err := allocator.procedure(allocator.data, .Free, 0, 0, ptr, byte_count, loc)
+	return err
+}
+
+mem_free_bytes :: #force_inline proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	if bytes == nil || allocator.procedure == nil {
+		return nil
+	}
+	_, err := allocator.procedure(allocator.data, .Free, 0, 0, raw_data(bytes), len(bytes), loc)
+	return err
+}
+
+
+mem_free_all :: #force_inline proc(allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
+	if allocator.procedure != nil {
+		_, err = allocator.procedure(allocator.data, .Free_All, 0, 0, nil, 0, loc)
+	}
+	return
+}
+
+_mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	if allocator.procedure == nil {
+		return nil, nil
+	}
+	if new_size == 0 {
+		if ptr != nil {
+			_, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
+			return
+		}
+		return
+	} else if ptr == nil {
+		if should_zero {
+			return allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
+		} else {
+			return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
+		}
+	} else if old_size == new_size && uintptr(ptr) % uintptr(alignment) == 0 {
+		data = ([^]byte)(ptr)[:old_size]
+		return
+	}
+
+	if should_zero {
+		data, err = allocator.procedure(allocator.data, .Resize, new_size, alignment, ptr, old_size, loc)
+	} else {
+		data, err = allocator.procedure(allocator.data, .Resize_Non_Zeroed, new_size, alignment, ptr, old_size, loc)
+	}
+	if err == .Mode_Not_Implemented {
+		if should_zero {
+			data, err = allocator.procedure(allocator.data, .Alloc, new_size, alignment, nil, 0, loc)
+		} else {
+			data, err = allocator.procedure(allocator.data, .Alloc_Non_Zeroed, new_size, alignment, nil, 0, loc)
+		}
+		if err != nil {
+			return
+		}
+		copy(data, ([^]byte)(ptr)[:old_size])
+		_, err = allocator.procedure(allocator.data, .Free, 0, 0, ptr, old_size, loc)
+	}
+	return
+}
+
+mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
+}
+non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
+}
+
+memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
+	switch {
+	case n == 0: return true
+	case x == y: return true
+	}
+	a, b := ([^]byte)(x), ([^]byte)(y)
+	length := uint(n)
+
+	for i := uint(0); i < length; i += 1 {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+	
+/*
+
+	when size_of(uint) == 8 {
+		if word_length := length >> 3; word_length != 0 {
+			for _ in 0..<word_length {
+				if intrinsics.unaligned_load((^u64)(a)) != intrinsics.unaligned_load((^u64)(b)) {
+					return false
+				}
+				a = a[size_of(u64):]
+				b = b[size_of(u64):]
+			}
+		}
+		
+		if length & 4 != 0 {
+			if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
+				return false
+			}
+			a = a[size_of(u32):]
+			b = b[size_of(u32):]
+		}
+		
+		if length & 2 != 0 {
+			if intrinsics.unaligned_load((^u16)(a)) != intrinsics.unaligned_load((^u16)(b)) {
+				return false
+			}
+			a = a[size_of(u16):]
+			b = b[size_of(u16):]
+		}
+		
+		if length & 1 != 0 && a[0] != b[0] {
+			return false	
+		}
+		return true
+	} else {
+		if word_length := length >> 2; word_length != 0 {
+			for _ in 0..<word_length {
+				if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
+					return false
+				}
+				a = a[size_of(u32):]
+				b = b[size_of(u32):]
+			}
+		}
+		
+		length &= 3
+		
+		if length != 0 {
+			for i in 0..<length {
+				if a[i] != b[i] {
+					return false
+				}
+			}
+		}
+
+		return true
+	}
+*/
+
+}
+memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
+	switch {
+	case a == b:   return 0
+	case a == nil: return -1
+	case b == nil: return +1
+	}
+
+	x := uintptr(a)
+	y := uintptr(b)
+	n := uintptr(n)
+
+	SU :: size_of(uintptr)
+	fast := n/SU + 1
+	offset := (fast-1)*SU
+	curr_block := uintptr(0)
+	if n < SU {
+		fast = 0
+	}
+
+	for /**/; curr_block < fast; curr_block += 1 {
+		va := (^uintptr)(x + curr_block * size_of(uintptr))^
+		vb := (^uintptr)(y + curr_block * size_of(uintptr))^
+		if va ~ vb != 0 {
+			for pos := curr_block*SU; pos < n; pos += 1 {
+				a := (^byte)(x+pos)^
+				b := (^byte)(y+pos)^
+				if a ~ b != 0 {
+					return -1 if (int(a) - int(b)) < 0 else +1
+				}
+			}
+		}
+	}
+
+	for /**/; offset < n; offset += 1 {
+		a := (^byte)(x+offset)^
+		b := (^byte)(y+offset)^
+		if a ~ b != 0 {
+			return -1 if (int(a) - int(b)) < 0 else +1
+		}
+	}
+
+	return 0
+}
+
+memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
+	x := uintptr(a)
+	n := uintptr(n)
+
+	SU :: size_of(uintptr)
+	fast := n/SU + 1
+	offset := (fast-1)*SU
+	curr_block := uintptr(0)
+	if n < SU {
+		fast = 0
+	}
+
+	for /**/; curr_block < fast; curr_block += 1 {
+		va := (^uintptr)(x + curr_block * size_of(uintptr))^
+		if va ~ 0 != 0 {
+			for pos := curr_block*SU; pos < n; pos += 1 {
+				a := (^byte)(x+pos)^
+				if a ~ 0 != 0 {
+					return -1 if int(a) < 0 else +1
+				}
+			}
+		}
+	}
+
+	for /**/; offset < n; offset += 1 {
+		a := (^byte)(x+offset)^
+		if a ~ 0 != 0 {
+			return -1 if int(a) < 0 else +1
+		}
+	}
+
+	return 0
+}
+
+string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
+	x := transmute(Raw_String)lhs
+	y := transmute(Raw_String)rhs
+	if x.len != y.len {
+		return false
+	}
+	return #force_inline memory_equal(x.data, y.data, x.len)
+}
+
+string_cmp :: proc "contextless" (a, b: string) -> int {
+	x := transmute(Raw_String)a
+	y := transmute(Raw_String)b
+
+	ret := memory_compare(x.data, y.data, min(x.len, y.len))
+	if ret == 0 && x.len != y.len {
+		return -1 if x.len < y.len else +1
+	}
+	return ret
+}
+
+string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
+string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
+string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
+string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
+string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }
+
+cstring_len :: proc "contextless" (s: cstring) -> int {
+	p0 := uintptr((^byte)(s))
+	p := p0
+	for p != 0 && (^byte)(p)^ != 0 {
+		p += 1
+	}
+	return int(p - p0)
+}
+
+cstring_to_string :: proc "contextless" (s: cstring) -> string {
+	if s == nil {
+		return ""
+	}
+	ptr := (^byte)(s)
+	n := cstring_len(s)
+	return transmute(string)Raw_String{ptr, n}
+}
+
+
+cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
+	x := ([^]byte)(lhs)
+	y := ([^]byte)(rhs)
+	if x == y {
+		return true
+	}
+	if (x == nil) ~ (y == nil) {
+		return false
+	}
+	xn := cstring_len(lhs)
+	yn := cstring_len(rhs)
+	if xn != yn {
+		return false
+	}
+	return #force_inline memory_equal(x, y, xn)
+}
+
+cstring_cmp :: proc "contextless" (lhs, rhs: cstring) -> int {
+	x := ([^]byte)(lhs)
+	y := ([^]byte)(rhs)
+	if x == y {
+		return 0
+	}
+	if (x == nil) ~ (y == nil) {
+		return -1 if x == nil else +1
+	}
+	xn := cstring_len(lhs)
+	yn := cstring_len(rhs)
+	ret := memory_compare(x, y, min(xn, yn))
+	if ret == 0 && xn != yn {
+		return -1 if xn < yn else +1
+	}
+	return ret
+}
+
+cstring_ne :: #force_inline proc "contextless" (a, b: cstring) -> bool { return !cstring_eq(a, b) }
+cstring_lt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) < 0 }
+cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) > 0 }
+cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
+cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }
+
+
+complex32_eq :: #force_inline proc "contextless"  (a, b: complex32)  -> bool { return real(a) == real(b) && imag(a) == imag(b) }
+complex32_ne :: #force_inline proc "contextless"  (a, b: complex32)  -> bool { return real(a) != real(b) || imag(a) != imag(b) }
+
+complex64_eq :: #force_inline proc "contextless"  (a, b: complex64)  -> bool { return real(a) == real(b) && imag(a) == imag(b) }
+complex64_ne :: #force_inline proc "contextless"  (a, b: complex64)  -> bool { return real(a) != real(b) || imag(a) != imag(b) }
+
+complex128_eq :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
+complex128_ne :: #force_inline proc "contextless" (a, b: complex128) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
+
+
+quaternion64_eq :: #force_inline proc "contextless"  (a, b: quaternion64)  -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
+quaternion64_ne :: #force_inline proc "contextless"  (a, b: quaternion64)  -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
+
+quaternion128_eq :: #force_inline proc "contextless"  (a, b: quaternion128)  -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
+quaternion128_ne :: #force_inline proc "contextless"  (a, b: quaternion128)  -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
+
+quaternion256_eq :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) == real(b) && imag(a) == imag(b) && jmag(a) == jmag(b) && kmag(a) == kmag(b) }
+quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }
+
+
+string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int) {
+	// NOTE(bill): Duplicated here to remove dependency on package unicode/utf8
+
+	@static accept_sizes := [256]u8{
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x30-0x3f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x40-0x4f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x50-0x5f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x60-0x6f
+		0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x70-0x7f
+
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x80-0x8f
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x90-0x9f
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xa0-0xaf
+		0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xb0-0xbf
+		0xf1, 0xf1, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0-0xcf
+		0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0-0xdf
+		0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x23, 0x03, 0x03, // 0xe0-0xef
+		0x34, 0x04, 0x04, 0x04, 0x44, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xf0-0xff
+	}
+	Accept_Range :: struct {lo, hi: u8}
+
+	@static accept_ranges := [5]Accept_Range{
+		{0x80, 0xbf},
+		{0xa0, 0xbf},
+		{0x80, 0x9f},
+		{0x90, 0xbf},
+		{0x80, 0x8f},
+	}
+
+	MASKX :: 0b0011_1111
+	MASK2 :: 0b0001_1111
+	MASK3 :: 0b0000_1111
+	MASK4 :: 0b0000_0111
+
+	LOCB :: 0b1000_0000
+	HICB :: 0b1011_1111
+
+
+	RUNE_ERROR :: '\ufffd'
+
+	n := len(s)
+	if n < 1 {
+		return RUNE_ERROR, 0
+	}
+	s0 := s[0]
+	x := accept_sizes[s0]
+	if x >= 0xF0 {
+		mask := rune(x) << 31 >> 31 // NOTE(bill): Create 0x0000 or 0xffff.
+		return rune(s[0])&~mask | RUNE_ERROR&mask, 1
+	}
+	sz := x & 7
+	accept := accept_ranges[x>>4]
+	if n < int(sz) {
+		return RUNE_ERROR, 1
+	}
+	b1 := s[1]
+	if b1 < accept.lo || accept.hi < b1 {
+		return RUNE_ERROR, 1
+	}
+	if sz == 2 {
+		return rune(s0&MASK2)<<6 | rune(b1&MASKX), 2
+	}
+	b2 := s[2]
+	if b2 < LOCB || HICB < b2 {
+		return RUNE_ERROR, 1
+	}
+	if sz == 3 {
+		return rune(s0&MASK3)<<12 | rune(b1&MASKX)<<6 | rune(b2&MASKX), 3
+	}
+	b3 := s[3]
+	if b3 < LOCB || HICB < b3 {
+		return RUNE_ERROR, 1
+	}
+	return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4
+}
+
+string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
+	RUNE_ERROR :: '\ufffd'
+	RUNE_SELF  :: 0x80
+	UTF_MAX    :: 4
+
+	r: rune
+	size: int
+	start, end, limit: int
+
+	end = len(s)
+	if end == 0 {
+		return RUNE_ERROR, 0
+	}
+	start = end-1
+	r = rune(s[start])
+	if r < RUNE_SELF {
+		return r, 1
+	}
+
+	limit = max(end - UTF_MAX, 0)
+
+	for start-=1; start >= limit; start-=1 {
+		if (s[start] & 0xc0) != RUNE_SELF {
+			break
+		}
+	}
+
+	start = max(start, 0)
+	r, size = string_decode_rune(s[start:end])
+	if start+size != end {
+		return RUNE_ERROR, 1
+	}
+	return r, size
+}
+
+abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
+	p, q := abs(real(x)), abs(imag(x))
+	if p < q {
+		p, q = q, p
+	}
+	if p == 0 {
+		return 0
+	}
+	q = q / p
+	return p * f16(intrinsics.sqrt(f32(1 + q*q)))
+}
+abs_complex64 :: #force_inline proc "contextless" (x: complex64) -> f32 {
+	p, q := abs(real(x)), abs(imag(x))
+	if p < q {
+		p, q = q, p
+	}
+	if p == 0 {
+		return 0
+	}
+	q = q / p
+	return p * intrinsics.sqrt(1 + q*q)
+}
+abs_complex128 :: #force_inline proc "contextless" (x: complex128) -> f64 {
+	p, q := abs(real(x)), abs(imag(x))
+	if p < q {
+		p, q = q, p
+	}
+	if p == 0 {
+		return 0
+	}
+	q = q / p
+	return p * intrinsics.sqrt(1 + q*q)
+}
+abs_quaternion64 :: #force_inline proc "contextless" (x: quaternion64) -> f16 {
+	r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
+	return f16(intrinsics.sqrt(f32(r*r + i*i + j*j + k*k)))
+}
+abs_quaternion128 :: #force_inline proc "contextless" (x: quaternion128) -> f32 {
+	r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
+	return intrinsics.sqrt(r*r + i*i + j*j + k*k)
+}
+abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64 {
+	r, i, j, k := real(x), imag(x), jmag(x), kmag(x)
+	return intrinsics.sqrt(r*r + i*i + j*j + k*k)
+}
+
+
+quo_complex32 :: proc "contextless" (n, m: complex32) -> complex32 {
+	e, f: f16
+
+	if abs(real(m)) >= abs(imag(m)) {
+		ratio := imag(m) / real(m)
+		denom := real(m) + ratio*imag(m)
+		e = (real(n) + imag(n)*ratio) / denom
+		f = (imag(n) - real(n)*ratio) / denom
+	} else {
+		ratio := real(m) / imag(m)
+		denom := imag(m) + ratio*real(m)
+		e = (real(n)*ratio + imag(n)) / denom
+		f = (imag(n)*ratio - real(n)) / denom
+	}
+
+	return complex(e, f)
+}
+
+
+quo_complex64 :: proc "contextless" (n, m: complex64) -> complex64 {
+	e, f: f32
+
+	if abs(real(m)) >= abs(imag(m)) {
+		ratio := imag(m) / real(m)
+		denom := real(m) + ratio*imag(m)
+		e = (real(n) + imag(n)*ratio) / denom
+		f = (imag(n) - real(n)*ratio) / denom
+	} else {
+		ratio := real(m) / imag(m)
+		denom := imag(m) + ratio*real(m)
+		e = (real(n)*ratio + imag(n)) / denom
+		f = (imag(n)*ratio - real(n)) / denom
+	}
+
+	return complex(e, f)
+}
+
+quo_complex128 :: proc "contextless" (n, m: complex128) -> complex128 {
+	e, f: f64
+
+	if abs(real(m)) >= abs(imag(m)) {
+		ratio := imag(m) / real(m)
+		denom := real(m) + ratio*imag(m)
+		e = (real(n) + imag(n)*ratio) / denom
+		f = (imag(n) - real(n)*ratio) / denom
+	} else {
+		ratio := real(m) / imag(m)
+		denom := imag(m) + ratio*real(m)
+		e = (real(n)*ratio + imag(n)) / denom
+		f = (imag(n)*ratio - real(n)) / denom
+	}
+
+	return complex(e, f)
+}
+
+mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
+	t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
+	t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
+	t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
+	t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
+	t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
+	t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
+	t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
+	t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
+	t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
+
+	t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
+	t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
+	t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
+	t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
+
+	t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
+	t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
+	t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
+	t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+quo_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
+	q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
+	r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
+
+	invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
+
+	t0 := (r0*q0 + r1*q1 + r2*q2 + r3*q3) * invmag2
+	t1 := (r0*q1 - r1*q0 - r2*q3 - r3*q2) * invmag2
+	t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
+	t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
+
+	return quaternion(w=t0, x=t1, y=t2, z=t3)
+}
+
+@(link_name="__truncsfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+truncsfhf2 :: proc "c" (value: f32) -> __float16 {
+	v: struct #raw_union { i: u32, f: f32 }
+	i, s, e, m: i32
+
+	v.f = value
+	i = i32(v.i)
+
+	s =  (i >> 16) & 0x00008000
+	e = ((i >> 23) & 0x000000ff) - (127 - 15)
+	m =   i        & 0x007fffff
+
+
+	if e <= 0 {
+		if e < -10 {
+			return transmute(__float16)u16(s)
+		}
+		m = (m | 0x00800000) >> u32(1 - e)
+
+		if m & 0x00001000 != 0 {
+			m += 0x00002000
+		}
+
+		return transmute(__float16)u16(s | (m >> 13))
+	} else if e == 0xff - (127 - 15) {
+		if m == 0 {
+			return transmute(__float16)u16(s | 0x7c00) /* NOTE(bill): infinity */
+		} else {
+			/* NOTE(bill): NAN */
+			m >>= 13
+			return transmute(__float16)u16(s | 0x7c00 | m | i32(m == 0))
+		}
+	} else {
+		if m & 0x00001000 != 0 {
+			m += 0x00002000
+			if (m & 0x00800000) != 0 {
+				m = 0
+				e += 1
+			}
+		}
+
+		if e > 30 {
+			f := i64(1e12)
+			for j := 0; j < 10; j += 1 {
+				/* NOTE(bill): Cause overflow */
+				g := intrinsics.volatile_load(&f)
+				g *= g
+				intrinsics.volatile_store(&f, g)
+			}
+
+			return transmute(__float16)u16(s | 0x7c00)
+		}
+
+		return transmute(__float16)u16(s | (e << 10) | (m >> 13))
+	}
+}
+
+
+@(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+truncdfhf2 :: proc "c" (value: f64) -> __float16 {
+	return truncsfhf2(f32(value))
+}
+
+@(link_name="__gnu_h2f_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+gnu_h2f_ieee :: proc "c" (value_: __float16) -> f32 {
+	fp32 :: struct #raw_union { u: u32, f: f32 }
+
+	value := transmute(u16)value_
+	v: fp32
+	magic, inf_or_nan: fp32
+	magic.u = u32((254 - 15) << 23)
+	inf_or_nan.u = u32((127 + 16) << 23)
+
+	v.u = u32(value & 0x7fff) << 13
+	v.f *= magic.f
+	if v.f >= inf_or_nan.f {
+		v.u |= 255 << 23
+	}
+	v.u |= u32(value & 0x8000) << 16
+	return v.f
+}
+
+
+@(link_name="__gnu_f2h_ieee", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+gnu_f2h_ieee :: proc "c" (value: f32) -> __float16 {
+	return truncsfhf2(value)
+}
+
+@(link_name="__extendhfsf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+extendhfsf2 :: proc "c" (value: __float16) -> f32 {
+	return gnu_h2f_ieee(value)
+}
+
+
+
+@(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+floattidf :: proc "c" (a: i128) -> f64 {
+when IS_WASM {
+	return 0
+} else {
+	DBL_MANT_DIG :: 53
+	if a == 0 {
+		return 0.0
+	}
+	a := a
+	N :: size_of(i128) * 8
+	s := a >> (N-1)
+	a = (a ~ s) - s
+	sd: = N - intrinsics.count_leading_zeros(a)  // number of significant digits
+	e := i32(sd - 1)        // exponent
+	if sd > DBL_MANT_DIG {
+		switch sd {
+		case DBL_MANT_DIG + 1:
+			a <<= 1
+		case DBL_MANT_DIG + 2:
+			// okay
+		case:
+			a = i128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
+			    i128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
+		}
+
+		a |= i128((a & 4) != 0)
+		a += 1
+		a >>= 2
+
+		if a & (i128(1) << DBL_MANT_DIG) != 0 {
+			a >>= 1
+			e += 1
+		}
+	} else {
+		a <<= u128(DBL_MANT_DIG - sd) & 127
+	}
+	fb: [2]u32
+	fb[1] = (u32(s) & 0x80000000) |          // sign
+	        (u32(e + 1023) << 20) |          // exponent
+	        u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
+	fb[0] = u32(a)                           // mantissa-low
+	return transmute(f64)fb
+}
+}
+
+
+@(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+floattidf_unsigned :: proc "c" (a: u128) -> f64 {
+when IS_WASM {
+	return 0
+} else {
+	DBL_MANT_DIG :: 53
+	if a == 0 {
+		return 0.0
+	}
+	a := a
+	N :: size_of(u128) * 8
+	sd: = N - intrinsics.count_leading_zeros(a)  // number of significant digits
+	e := i32(sd - 1)        // exponent
+	if sd > DBL_MANT_DIG {
+		switch sd {
+		case DBL_MANT_DIG + 1:
+			a <<= 1
+		case DBL_MANT_DIG + 2:
+			// okay
+		case:
+			a = u128(u128(a) >> u128(sd - (DBL_MANT_DIG+2))) |
+				u128(u128(a) & (~u128(0) >> u128(N + DBL_MANT_DIG+2 - sd)) != 0)
+		}
+
+		a |= u128((a & 4) != 0)
+		a += 1
+		a >>= 2
+
+		if a & (1 << DBL_MANT_DIG) != 0 {
+			a >>= 1
+			e += 1
+		}
+	} else {
+		a <<= u128(DBL_MANT_DIG - sd)
+	}
+	fb: [2]u32
+	fb[1] = (0) |                            // sign
+	        u32((e + 1023) << 20) |          // exponent
+	        u32((u64(a) >> 32) & 0x000FFFFF) // mantissa-high
+	fb[0] = u32(a)                           // mantissa-low
+	return transmute(f64)fb
+}
+}
+
+
+
+@(link_name="__fixunsdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+fixunsdfti :: #force_no_inline proc "c" (a: f64) -> u128 {
+	// TODO(bill): implement `fixunsdfti` correctly
+	x := u64(a)
+	return u128(x)
+}
+
+@(link_name="__fixunsdfdi", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+fixunsdfdi :: #force_no_inline proc "c" (a: f64) -> i128 {
+	// TODO(bill): implement `fixunsdfdi` correctly
+	x := i64(a)
+	return i128(x)
+}
+
+
+
+
+@(link_name="__umodti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+umodti3 :: proc "c" (a, b: u128) -> u128 {
+	r: u128 = ---
+	_ = udivmod128(a, b, &r)
+	return r
+}
+
+
+@(link_name="__udivmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+udivmodti4 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
+	return udivmod128(a, b, rem)
+}
+
+@(link_name="__udivti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+udivti3 :: proc "c" (a, b: u128) -> u128 {
+	return udivmodti4(a, b, nil)
+}
+
+
+@(link_name="__modti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+modti3 :: proc "c" (a, b: i128) -> i128 {
+	s_a := a >> (128 - 1)
+	s_b := b >> (128 - 1)
+	an := (a ~ s_a) - s_a
+	bn := (b ~ s_b) - s_b
+
+	r: u128 = ---
+	_ = udivmod128(transmute(u128)an, transmute(u128)bn, &r)
+	return (transmute(i128)r ~ s_a) - s_a
+}
+
+
+@(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
+	u := udivmod128(transmute(u128)a, transmute(u128)b, cast(^u128)rem)
+	return transmute(i128)u
+}
+
+@(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+divti3 :: proc "c" (a, b: i128) -> i128 {
+	u := udivmodti4(transmute(u128)a, transmute(u128)b, nil)
+	return transmute(i128)u
+}
+
+
+@(link_name="__fixdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
+fixdfti :: proc(a: u64) -> i128 {
+	significandBits :: 52
+	typeWidth       :: (size_of(u64)*8)
+	exponentBits    :: (typeWidth - significandBits - 1)
+	maxExponent     :: ((1 << exponentBits) - 1)
+	exponentBias    :: (maxExponent >> 1)
+
+	implicitBit     :: (u64(1) << significandBits)
+	significandMask :: (implicitBit - 1)
+	signBit         :: (u64(1) << (significandBits + exponentBits))
+	absMask         :: (signBit - 1)
+	exponentMask    :: (absMask ~ significandMask)
+
+	// Break a into sign, exponent, significand
+	aRep := a
+	aAbs := aRep & absMask
+	sign := i128(-1 if aRep & signBit != 0 else 1)
+	exponent := u64((aAbs >> significandBits) - exponentBias)
+	significand := u64((aAbs & significandMask) | implicitBit)
+
+	// If exponent is negative, the result is zero.
+	if exponent < 0 {
+		return 0
+	}
+
+	// If the value is too large for the integer type, saturate.
+	if exponent >= size_of(i128) * 8 {
+		return max(i128) if sign == 1 else min(i128)
+	}
+
+	// If 0 <= exponent < significandBits, right shift to get the result.
+	// Otherwise, shift left.
+	if exponent < significandBits {
+		return sign * i128(significand >> (significandBits - exponent))
+	} else {
+		return sign * (i128(significand) << (exponent - significandBits))
+	}
+
+}
diff --git a/base/runtime/os_specific.odin b/base/runtime/os_specific.odin
new file mode 100644
index 000000000..022d315d4
--- /dev/null
+++ b/base/runtime/os_specific.odin
@@ -0,0 +1,7 @@
+package runtime
+
+_OS_Errno :: distinct int
+
+os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	return _os_write(data)
+}
diff --git a/base/runtime/os_specific_any.odin b/base/runtime/os_specific_any.odin
new file mode 100644
index 000000000..6a96655c4
--- /dev/null
+++ b/base/runtime/os_specific_any.odin
@@ -0,0 +1,16 @@
+//+build !darwin
+//+build !freestanding
+//+build !js
+//+build !wasi
+//+build !windows
+package runtime
+
+import "core:os"
+
+// TODO(bill): reimplement `os.write` so that it does not rely on package os
+// NOTE: Use os_specific_linux.odin, os_specific_darwin.odin, etc
+_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	context = default_context()
+	n, err := os.write(os.stderr, data)
+	return int(n), _OS_Errno(err)
+}
diff --git a/base/runtime/os_specific_darwin.odin b/base/runtime/os_specific_darwin.odin
new file mode 100644
index 000000000..5de9a7d57
--- /dev/null
+++ b/base/runtime/os_specific_darwin.odin
@@ -0,0 +1,12 @@
+//+build darwin
+package runtime
+
+import "core:intrinsics"
+
+_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	ret := intrinsics.syscall(0x2000004, 1, uintptr(raw_data(data)), uintptr(len(data)))
+	if ret < 0 {
+		return 0, _OS_Errno(-ret)
+	}
+	return int(ret), 0
+}
diff --git a/base/runtime/os_specific_freestanding.odin b/base/runtime/os_specific_freestanding.odin
new file mode 100644
index 000000000..a6d04cefb
--- /dev/null
+++ b/base/runtime/os_specific_freestanding.odin
@@ -0,0 +1,7 @@
+//+build freestanding
+package runtime
+
+// TODO(bill): reimplement `os.write`
+_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	return 0, -1
+}
diff --git a/base/runtime/os_specific_js.odin b/base/runtime/os_specific_js.odin
new file mode 100644
index 000000000..246141d87
--- /dev/null
+++ b/base/runtime/os_specific_js.odin
@@ -0,0 +1,12 @@
+//+build js
+package runtime
+
+foreign import "odin_env"
+
+_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	foreign odin_env {
+		write :: proc "contextless" (fd: u32, p: []byte) ---
+	}
+	write(1, data)
+	return len(data), 0
+}
diff --git a/base/runtime/os_specific_wasi.odin b/base/runtime/os_specific_wasi.odin
new file mode 100644
index 000000000..3f69504ee
--- /dev/null
+++ b/base/runtime/os_specific_wasi.odin
@@ -0,0 +1,10 @@
+//+build wasi
+package runtime
+
+import "core:sys/wasm/wasi"
+
+_os_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
+	data := (wasi.ciovec_t)(data)
+	n, err := wasi.fd_write(1, {data})
+	return int(n), _OS_Errno(err)
+}
diff --git a/base/runtime/os_specific_windows.odin b/base/runtime/os_specific_windows.odin
new file mode 100644
index 000000000..4a5907466
--- /dev/null
+++ b/base/runtime/os_specific_windows.odin
@@ -0,0 +1,135 @@
+//+build windows
+package runtime
+
+foreign import kernel32 "system:Kernel32.lib"
+
+@(private="file")
+@(default_calling_convention="system")
+foreign kernel32 {
+	// NOTE(bill): The types are not using the standard names (e.g. DWORD and LPVOID) to just minimizing the dependency
+
+	// os_write
+	GetStdHandle         :: proc(which: u32) -> rawptr ---
+	SetHandleInformation :: proc(hObject: rawptr, dwMask: u32, dwFlags: u32) -> b32 ---
+	WriteFile            :: proc(hFile: rawptr, lpBuffer: rawptr, nNumberOfBytesToWrite: u32, lpNumberOfBytesWritten: ^u32, lpOverlapped: rawptr) -> b32 ---
+	GetLastError         :: proc() -> u32 ---
+
+	// default_allocator
+	GetProcessHeap :: proc() -> rawptr ---
+	HeapAlloc      :: proc(hHeap: rawptr, dwFlags: u32, dwBytes: uint) -> rawptr ---
+	HeapReAlloc    :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr, dwBytes: uint) -> rawptr ---
+	HeapFree       :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr) -> b32 ---
+}
+
+_os_write :: proc "contextless" (data: []byte) -> (n: int, err: _OS_Errno) #no_bounds_check {
+	if len(data) == 0 {
+		return 0, 0
+	}
+
+	STD_ERROR_HANDLE :: ~u32(0) -12 + 1
+	HANDLE_FLAG_INHERIT :: 0x00000001
+	MAX_RW :: 1<<30
+
+	h := GetStdHandle(STD_ERROR_HANDLE)
+	when size_of(uintptr) == 8 {
+		SetHandleInformation(h, HANDLE_FLAG_INHERIT, 0)
+	}
+
+	single_write_length: u32
+	total_write: i64
+	length := i64(len(data))
+
+	for total_write < length {
+		remaining := length - total_write
+		to_write := u32(min(i32(remaining), MAX_RW))
+
+		e := WriteFile(h, &data[total_write], to_write, &single_write_length, nil)
+		if single_write_length <= 0 || !e {
+			err = _OS_Errno(GetLastError())
+			n = int(total_write)
+			return
+		}
+		total_write += i64(single_write_length)
+	}
+	n = int(total_write)
+	return
+}
+
+heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
+	HEAP_ZERO_MEMORY :: 0x00000008
+	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
+}
+heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
+	if new_size == 0 {
+		heap_free(ptr)
+		return nil
+	}
+	if ptr == nil {
+		return heap_alloc(new_size)
+	}
+
+	HEAP_ZERO_MEMORY :: 0x00000008
+	return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
+}
+heap_free :: proc "contextless" (ptr: rawptr) {
+	if ptr == nil {
+		return
+	}
+	HeapFree(GetProcessHeap(), 0, ptr)
+}
+
+
+//
+// NOTE(tetra, 2020-01-14): The heap doesn't respect alignment.
+// Instead, we overallocate by `alignment + size_of(rawptr) - 1`, and insert
+// padding. We also store the original pointer returned by heap_alloc right before
+// the pointer we return to the user.
+//
+
+
+
+_windows_default_alloc_or_resize :: proc "contextless" (size, alignment: int, old_ptr: rawptr = nil, zero_memory := true) -> ([]byte, Allocator_Error) {
+	if size == 0 {
+		_windows_default_free(old_ptr)
+		return nil, nil
+	}
+
+	a := max(alignment, align_of(rawptr))
+	space := size + a - 1
+
+	allocated_mem: rawptr
+	if old_ptr != nil {
+		original_old_ptr := ([^]rawptr)(old_ptr)[-1]
+		allocated_mem = heap_resize(original_old_ptr, space+size_of(rawptr))
+	} else {
+		allocated_mem = heap_alloc(space+size_of(rawptr), zero_memory)
+	}
+	aligned_mem := ([^]u8)(allocated_mem)[size_of(rawptr):]
+
+	ptr := uintptr(aligned_mem)
+	aligned_ptr := (ptr - 1 + uintptr(a)) & -uintptr(a)
+	diff := int(aligned_ptr - ptr)
+	if (size + diff) > space || allocated_mem == nil {
+		return nil, .Out_Of_Memory
+	}
+
+	aligned_mem = ([^]byte)(aligned_ptr)
+	([^]rawptr)(aligned_mem)[-1] = allocated_mem
+
+	return aligned_mem[:size], nil
+}
+
+_windows_default_alloc :: proc "contextless" (size, alignment: int, zero_memory := true) -> ([]byte, Allocator_Error) {
+	return _windows_default_alloc_or_resize(size, alignment, nil, zero_memory)
+}
+
+
+_windows_default_free :: proc "contextless" (ptr: rawptr) {
+	if ptr != nil {
+		heap_free(([^]rawptr)(ptr)[-1])
+	}
+}
+
+_windows_default_resize :: proc "contextless" (p: rawptr, old_size: int, new_size: int, new_alignment: int) -> ([]byte, Allocator_Error) {
+	return _windows_default_alloc_or_resize(new_size, new_alignment, p)
+}
diff --git a/base/runtime/print.odin b/base/runtime/print.odin
new file mode 100644
index 000000000..87c8757d5
--- /dev/null
+++ b/base/runtime/print.odin
@@ -0,0 +1,489 @@
+package runtime
+
+_INTEGER_DIGITS :: "0123456789abcdefghijklmnopqrstuvwxyz"
+
+@(private="file")
+_INTEGER_DIGITS_VAR := _INTEGER_DIGITS
+
+when !ODIN_NO_RTTI {
+	print_any_single :: proc "contextless" (arg: any) {
+		x := arg
+		if x.data == nil {
+			print_string("nil")
+			return
+		}
+
+		if loc, ok := x.(Source_Code_Location); ok {
+			print_caller_location(loc)
+			return
+		}
+		x.id = typeid_base(x.id)
+		switch v in x {
+		case typeid:     print_typeid(v)
+		case ^Type_Info: print_type(v)
+
+		case string:  print_string(v)
+		case cstring: print_string(string(v))
+		case []byte:  print_string(string(v))
+
+		case rune:  print_rune(v)
+
+		case u8:    print_u64(u64(v))
+		case u16:   print_u64(u64(v))
+		case u16le: print_u64(u64(v))
+		case u16be: print_u64(u64(v))
+		case u32:   print_u64(u64(v))
+		case u32le: print_u64(u64(v))
+		case u32be: print_u64(u64(v))
+		case u64:   print_u64(u64(v))
+		case u64le: print_u64(u64(v))
+		case u64be: print_u64(u64(v))
+
+		case i8:    print_i64(i64(v))
+		case i16:   print_i64(i64(v))
+		case i16le: print_i64(i64(v))
+		case i16be: print_i64(i64(v))
+		case i32:   print_i64(i64(v))
+		case i32le: print_i64(i64(v))
+		case i32be: print_i64(i64(v))
+		case i64:   print_i64(i64(v))
+		case i64le: print_i64(i64(v))
+		case i64be: print_i64(i64(v))
+
+		case int:     print_int(v)
+		case uint:    print_uint(v)
+		case uintptr: print_uintptr(v)
+		case rawptr:  print_uintptr(uintptr(v))
+
+		case bool: print_string("true" if v else "false")
+		case b8:   print_string("true" if v else "false")
+		case b16:  print_string("true" if v else "false")
+		case b32:  print_string("true" if v else "false")
+		case b64:  print_string("true" if v else "false")
+
+		case:
+			ti := type_info_of(x.id)
+			#partial switch v in ti.variant {
+			case Type_Info_Pointer, Type_Info_Multi_Pointer:
+				print_uintptr((^uintptr)(x.data)^)
+				return
+			}
+
+			print_string("<invalid-value>")
+		}
+	}
+	println_any :: proc "contextless" (args: ..any) {
+		context = default_context()
+		loop: for arg, i in args {
+			assert(arg.id != nil)
+			if i != 0 {
+				print_string(" ")
+			}
+			print_any_single(arg)
+		}
+		print_string("\n")
+	}
+}
+
+
+encode_rune :: proc "contextless" (c: rune) -> ([4]u8, int) {
+	r := c
+
+	buf: [4]u8
+	i := u32(r)
+	mask :: u8(0x3f)
+	if i <= 1<<7-1 {
+		buf[0] = u8(r)
+		return buf, 1
+	}
+	if i <= 1<<11-1 {
+		buf[0] = 0xc0 | u8(r>>6)
+		buf[1] = 0x80 | u8(r) & mask
+		return buf, 2
+	}
+
+	// Invalid or Surrogate range
+	if i > 0x0010ffff ||
+	   (0xd800 <= i && i <= 0xdfff) {
+		r = 0xfffd
+	}
+
+	if i <= 1<<16-1 {
+		buf[0] = 0xe0 | u8(r>>12)
+		buf[1] = 0x80 | u8(r>>6) & mask
+		buf[2] = 0x80 | u8(r)    & mask
+		return buf, 3
+	}
+
+	buf[0] = 0xf0 | u8(r>>18)
+	buf[1] = 0x80 | u8(r>>12) & mask
+	buf[2] = 0x80 | u8(r>>6)  & mask
+	buf[3] = 0x80 | u8(r)     & mask
+	return buf, 4
+}
+
+print_string :: proc "contextless" (str: string) -> (n: int) {
+	n, _ = os_write(transmute([]byte)str)
+	return
+}
+
+print_strings :: proc "contextless" (args: ..string) -> (n: int) {
+	for str in args {
+		m, err := os_write(transmute([]byte)str)
+		n += m
+		if err != 0 {
+			break
+		}
+	}
+	return
+}
+
+print_byte :: proc "contextless" (b: byte) -> (n: int) {
+	n, _ = os_write([]byte{b})
+	return
+}
+
+print_encoded_rune :: proc "contextless" (r: rune) {
+	print_byte('\'')
+
+	switch r {
+	case '\a': print_string("\\a")
+	case '\b': print_string("\\b")
+	case '\e': print_string("\\e")
+	case '\f': print_string("\\f")
+	case '\n': print_string("\\n")
+	case '\r': print_string("\\r")
+	case '\t': print_string("\\t")
+	case '\v': print_string("\\v")
+	case:
+		if r <= 0 {
+			print_string("\\x00")
+		} else if r < 32 {
+			n0, n1 := u8(r) >> 4, u8(r) & 0xf
+			print_string("\\x")
+			print_byte(_INTEGER_DIGITS_VAR[n0])
+			print_byte(_INTEGER_DIGITS_VAR[n1])
+		} else {
+			print_rune(r)
+		}
+	}
+	print_byte('\'')
+}
+
+print_rune :: proc "contextless" (r: rune) -> int #no_bounds_check {
+	RUNE_SELF :: 0x80
+
+	if r < RUNE_SELF {
+		return print_byte(byte(r))
+	}
+
+	b, n := encode_rune(r)
+	m, _ := os_write(b[:n])
+	return m
+}
+
+
+print_u64 :: proc "contextless" (x: u64) #no_bounds_check {
+	a: [129]byte
+	i := len(a)
+	b := u64(10)
+	u := x
+	for u >= b {
+		i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
+		u /= b
+	}
+	i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
+
+	os_write(a[i:])
+}
+
+
+print_i64 :: proc "contextless" (x: i64) #no_bounds_check {
+	b :: i64(10)
+
+	u := x
+	neg := u < 0
+	u = abs(u)
+
+	a: [129]byte
+	i := len(a)
+	for u >= b {
+		i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
+		u /= b
+	}
+	i -= 1; a[i] = _INTEGER_DIGITS_VAR[u % b]
+	if neg {
+		i -= 1; a[i] = '-'
+	}
+
+	os_write(a[i:])
+}
+
+print_uint    :: proc "contextless" (x: uint)    { print_u64(u64(x)) }
+print_uintptr :: proc "contextless" (x: uintptr) { print_u64(u64(x)) }
+print_int     :: proc "contextless" (x: int)     { print_i64(i64(x)) }
+
+print_caller_location :: proc "contextless" (loc: Source_Code_Location) {
+	print_string(loc.file_path)
+	when ODIN_ERROR_POS_STYLE == .Default {
+		print_byte('(')
+		print_u64(u64(loc.line))
+		print_byte(':')
+		print_u64(u64(loc.column))
+		print_byte(')')
+	} else when ODIN_ERROR_POS_STYLE == .Unix {
+		print_byte(':')
+		print_u64(u64(loc.line))
+		print_byte(':')
+		print_u64(u64(loc.column))
+		print_byte(':')
+	} else {
+		#panic("unhandled ODIN_ERROR_POS_STYLE")
+	}
+}
+print_typeid :: proc "contextless" (id: typeid) {
+	when ODIN_NO_RTTI {
+		if id == nil {
+			print_string("nil")
+		} else {
+			print_string("<unknown type>")
+		}
+	} else {
+		if id == nil {
+			print_string("nil")
+		} else {
+			ti := type_info_of(id)
+			print_type(ti)
+		}
+	}
+}
+print_type :: proc "contextless" (ti: ^Type_Info) {
+	if ti == nil {
+		print_string("nil")
+		return
+	}
+
+	switch info in ti.variant {
+	case Type_Info_Named:
+		print_string(info.name)
+	case Type_Info_Integer:
+		switch ti.id {
+		case int:     print_string("int")
+		case uint:    print_string("uint")
+		case uintptr: print_string("uintptr")
+		case:
+			print_byte('i' if info.signed else 'u')
+			print_u64(u64(8*ti.size))
+		}
+	case Type_Info_Rune:
+		print_string("rune")
+	case Type_Info_Float:
+		print_byte('f')
+		print_u64(u64(8*ti.size))
+	case Type_Info_Complex:
+		print_string("complex")
+		print_u64(u64(8*ti.size))
+	case Type_Info_Quaternion:
+		print_string("quaternion")
+		print_u64(u64(8*ti.size))
+	case Type_Info_String:
+		print_string("string")
+	case Type_Info_Boolean:
+		switch ti.id {
+		case bool: print_string("bool")
+		case:
+			print_byte('b')
+			print_u64(u64(8*ti.size))
+		}
+	case Type_Info_Any:
+		print_string("any")
+	case Type_Info_Type_Id:
+		print_string("typeid")
+
+	case Type_Info_Pointer:
+		if info.elem == nil {
+			print_string("rawptr")
+		} else {
+			print_string("^")
+			print_type(info.elem)
+		}
+	case Type_Info_Multi_Pointer:
+		print_string("[^]")
+		print_type(info.elem)
+	case Type_Info_Soa_Pointer:
+		print_string("#soa ^")
+		print_type(info.elem)
+	case Type_Info_Procedure:
+		print_string("proc")
+		if info.params == nil {
+			print_string("()")
+		} else {
+			t := info.params.variant.(Type_Info_Parameters)
+			print_byte('(')
+			for t, i in t.types {
+				if i > 0 { print_string(", ") }
+				print_type(t)
+			}
+			print_string(")")
+		}
+		if info.results != nil {
+			print_string(" -> ")
+			print_type(info.results)
+		}
+	case Type_Info_Parameters:
+		count := len(info.names)
+		if count != 1 { print_byte('(') }
+		for name, i in info.names {
+			if i > 0 { print_string(", ") }
+
+			t := info.types[i]
+
+			if len(name) > 0 {
+				print_string(name)
+				print_string(": ")
+			}
+			print_type(t)
+		}
+		if count != 1 { print_string(")") }
+
+	case Type_Info_Array:
+		print_byte('[')
+		print_u64(u64(info.count))
+		print_byte(']')
+		print_type(info.elem)
+
+	case Type_Info_Enumerated_Array:
+		if info.is_sparse {
+			print_string("#sparse")
+		}
+		print_byte('[')
+		print_type(info.index)
+		print_byte(']')
+		print_type(info.elem)
+
+
+	case Type_Info_Dynamic_Array:
+		print_string("[dynamic]")
+		print_type(info.elem)
+	case Type_Info_Slice:
+		print_string("[]")
+		print_type(info.elem)
+
+	case Type_Info_Map:
+		print_string("map[")
+		print_type(info.key)
+		print_byte(']')
+		print_type(info.value)
+
+	case Type_Info_Struct:
+		switch info.soa_kind {
+		case .None: // Ignore
+		case .Fixed:
+			print_string("#soa[")
+			print_u64(u64(info.soa_len))
+			print_byte(']')
+			print_type(info.soa_base_type)
+			return
+		case .Slice:
+			print_string("#soa[]")
+			print_type(info.soa_base_type)
+			return
+		case .Dynamic:
+			print_string("#soa[dynamic]")
+			print_type(info.soa_base_type)
+			return
+		}
+
+		print_string("struct ")
+		if info.is_packed    { print_string("#packed ") }
+		if info.is_raw_union { print_string("#raw_union ") }
+		if info.custom_align {
+			print_string("#align(")
+			print_u64(u64(ti.align))
+			print_string(") ")
+		}
+		print_byte('{')
+		for name, i in info.names {
+			if i > 0 { print_string(", ") }
+			print_string(name)
+			print_string(": ")
+			print_type(info.types[i])
+		}
+		print_byte('}')
+
+	case Type_Info_Union:
+		print_string("union ")
+		if info.custom_align {
+			print_string("#align(")
+			print_u64(u64(ti.align))
+			print_string(") ")
+		}
+		if info.no_nil {
+			print_string("#no_nil ")
+		}
+		print_byte('{')
+		for variant, i in info.variants {
+			if i > 0 { print_string(", ") }
+			print_type(variant)
+		}
+		print_string("}")
+
+	case Type_Info_Enum:
+		print_string("enum ")
+		print_type(info.base)
+		print_string(" {")
+		for name, i in info.names {
+			if i > 0 { print_string(", ") }
+			print_string(name)
+		}
+		print_string("}")
+
+	case Type_Info_Bit_Set:
+		print_string("bit_set[")
+
+		#partial switch elem in type_info_base(info.elem).variant {
+		case Type_Info_Enum:
+			print_type(info.elem)
+		case Type_Info_Rune:
+			print_encoded_rune(rune(info.lower))
+			print_string("..")
+			print_encoded_rune(rune(info.upper))
+		case:
+			print_i64(info.lower)
+			print_string("..")
+			print_i64(info.upper)
+		}
+		if info.underlying != nil {
+			print_string("; ")
+			print_type(info.underlying)
+		}
+		print_byte(']')
+
+
+	case Type_Info_Simd_Vector:
+		print_string("#simd[")
+		print_u64(u64(info.count))
+		print_byte(']')
+		print_type(info.elem)
+
+	case Type_Info_Relative_Pointer:
+		print_string("#relative(")
+		print_type(info.base_integer)
+		print_string(") ")
+		print_type(info.pointer)
+
+	case Type_Info_Relative_Multi_Pointer:
+		print_string("#relative(")
+		print_type(info.base_integer)
+		print_string(") ")
+		print_type(info.pointer)
+		
+	case Type_Info_Matrix:
+		print_string("matrix[")
+		print_u64(u64(info.row_count))
+		print_string(", ")
+		print_u64(u64(info.column_count))
+		print_string("]")
+		print_type(info.elem)
+	}
+}
diff --git a/base/runtime/procs.odin b/base/runtime/procs.odin
new file mode 100644
index 000000000..454574c35
--- /dev/null
+++ b/base/runtime/procs.odin
@@ -0,0 +1,95 @@
+package runtime
+
+when ODIN_NO_CRT && ODIN_OS == .Windows {
+	foreign import lib "system:NtDll.lib"
+	
+	@(private="file")
+	@(default_calling_convention="system")
+	foreign lib {
+		RtlMoveMemory :: proc(dst, s: rawptr, length: int) ---
+		RtlFillMemory :: proc(dst: rawptr, length: int, fill: i32) ---
+	}
+	
+	@(link_name="memset", linkage="strong", require)
+	memset :: proc "c" (ptr: rawptr, val: i32, len: int) -> rawptr {
+		RtlFillMemory(ptr, len, val)
+		return ptr
+	}
+	@(link_name="memmove", linkage="strong", require)
+	memmove :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
+		RtlMoveMemory(dst, src, len)
+		return dst
+	}
+	@(link_name="memcpy", linkage="strong", require)
+	memcpy :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
+		RtlMoveMemory(dst, src, len)
+		return dst
+	}
+} else when ODIN_NO_CRT || (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
+	@(link_name="memset", linkage="strong", require)
+	memset :: proc "c" (ptr: rawptr, val: i32, len: int) -> rawptr {
+		if ptr != nil && len != 0 {
+			b := byte(val)
+			p := ([^]byte)(ptr)
+			for i := 0; i < len; i += 1 {
+				p[i] = b
+			}
+		}
+		return ptr
+	}
+
+	@(link_name="bzero", linkage="strong", require)
+	bzero :: proc "c" (ptr: rawptr, len: int) -> rawptr {
+		if ptr != nil && len != 0 {
+			p := ([^]byte)(ptr)
+			for i := 0; i < len; i += 1 {
+				p[i] = 0
+			}
+		}
+		return ptr
+	}
+
+	@(link_name="memmove", linkage="strong", require)
+	memmove :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
+		d, s := ([^]byte)(dst), ([^]byte)(src)
+		if d == s || len == 0 {
+			return dst
+		}
+		if d > s && uintptr(d)-uintptr(s) < uintptr(len) {
+			for i := len-1; i >= 0; i -= 1 {
+				d[i] = s[i]
+			}
+			return dst
+		}
+
+		if s > d && uintptr(s)-uintptr(d) < uintptr(len) {
+			for i := 0; i < len; i += 1 {
+				d[i] = s[i]
+			}
+			return dst
+		}
+		return memcpy(dst, src, len)
+	}
+	@(link_name="memcpy", linkage="strong", require)
+	memcpy :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
+		d, s := ([^]byte)(dst), ([^]byte)(src)
+		if d != s {
+			for i := 0; i < len; i += 1 {
+				d[i] = s[i]
+			}
+		}
+		return d
+		
+	}
+} else {
+	memset :: proc "c" (ptr: rawptr, val: i32, len: int) -> rawptr {
+		if ptr != nil && len != 0 {
+			b := byte(val)
+			p := ([^]byte)(ptr)
+			for i := 0; i < len; i += 1 {
+				p[i] = b
+			}
+		}
+		return ptr
+	}
+}
+\ No newline at end of file
diff --git a/base/runtime/procs_darwin.odin b/base/runtime/procs_darwin.odin
new file mode 100644
index 000000000..9c53b5b16
--- /dev/null
+++ b/base/runtime/procs_darwin.odin
@@ -0,0 +1,21 @@
+//+private
+package runtime
+
+foreign import "system:Foundation.framework"
+
+import "core:intrinsics"
+
+objc_id :: ^intrinsics.objc_object
+objc_Class :: ^intrinsics.objc_class
+objc_SEL :: ^intrinsics.objc_selector
+
+foreign Foundation {
+	objc_lookUpClass :: proc "c" (name: cstring) -> objc_Class ---
+	sel_registerName :: proc "c" (name: cstring) -> objc_SEL ---
+	objc_allocateClassPair :: proc "c" (superclass: objc_Class, name: cstring, extraBytes: uint) -> objc_Class ---
+
+	objc_msgSend        :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
+	objc_msgSend_fpret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> f64 ---
+	objc_msgSend_fp2ret :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> complex128 ---
+	objc_msgSend_stret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
+}
diff --git a/base/runtime/procs_js.odin b/base/runtime/procs_js.odin
new file mode 100644
index 000000000..d3e12410c
--- /dev/null
+++ b/base/runtime/procs_js.odin
@@ -0,0 +1,15 @@
+//+build js
+package runtime
+
+init_default_context_for_js: Context
+@(init, private="file")
+init_default_context :: proc() {
+	init_default_context_for_js = context
+}
+
+@(export)
+@(link_name="default_context_ptr")
+default_context_ptr :: proc "contextless" () -> ^Context {
+	return &init_default_context_for_js
+}
+
diff --git a/base/runtime/procs_wasm.odin b/base/runtime/procs_wasm.odin
new file mode 100644
index 000000000..26dcfef77
--- /dev/null
+++ b/base/runtime/procs_wasm.odin
@@ -0,0 +1,40 @@
+//+build wasm32, wasm64p32
+package runtime
+
+@(private="file")
+ti_int :: struct #raw_union {
+	using s: struct { lo, hi: u64 },
+	all: i128,
+}
+
+@(link_name="__ashlti3", linkage="strong")
+__ashlti3 :: proc "contextless" (a: i128, b_: u32) -> i128 {
+	bits_in_dword :: size_of(u32)*8
+	b := u32(b_)
+	
+	input, result: ti_int
+	input.all = a
+	if b & bits_in_dword != 0 {
+		result.lo = 0
+		result.hi = input.lo << (b-bits_in_dword)
+	} else {
+		if b == 0 {
+			return a
+		}
+		result.lo = input.lo<<b
+		result.hi = (input.hi<<b) | (input.lo>>(bits_in_dword-b))
+	}
+	return result.all
+}
+
+
+@(link_name="__multi3", linkage="strong")
+__multi3 :: proc "contextless" (a, b: i128) -> i128 {
+	x, y, r: ti_int
+	
+	x.all = a
+	y.all = b
+	r.all = i128(x.lo * y.lo) // TODO this is incorrect
+	r.hi += x.hi*y.lo + x.lo*y.hi
+	return r.all
+}
+\ No newline at end of file
diff --git a/base/runtime/procs_windows_amd64.asm b/base/runtime/procs_windows_amd64.asm
new file mode 100644
index 000000000..f588b3453
--- /dev/null
+++ b/base/runtime/procs_windows_amd64.asm
@@ -0,0 +1,79 @@
+bits 64
+
+global __chkstk
+global _tls_index
+global _fltused
+
+section .data
+	_tls_index: dd 0
+	_fltused:   dd 0x9875
+
+section .text
+; NOTE(flysand): The function call to __chkstk is called
+; by the compiler, when we're allocating arrays larger than
+; a page size. The reason is because the OS doesn't map the
+; whole stack into memory all at once, but does so page-by-page.
+; When the next page is touched, the CPU generates a page fault,
+; which *the OS* is handling by allocating the next page in the
+; stack until we reach the limit of stack size.
+;
+; This page is called the guard page, touching it will extend
+; the size of the stack and overwrite the stack limit in the TEB.
+;
+; If we allocate a large enough array and start writing from the
+; bottom of it, it's possible that we may start touching
+; non-contiguous pages which are unmapped. OS only maps the stack
+; page into the memory if the page above it was also mapped.
+;
+; Therefore the compilers insert this routine, the sole purpose
+; of which is to step through the stack starting from the RSP
+; down to the new RSP after allocation, and touch every page
+; of the new allocation so that the stack is fully mapped for
+; the new allocation
+;
+; I've gotten this code by disassembling the output of MSVC long
+; time ago. I don't remember if I've cleaned it up, but it definately
+; stinks.
+;
+; Additional notes:
+;   RAX (passed as parameter) holds the allocation's size
+;   GS:[0x10] references the current stack limit
+;     (i.e. bottom of the stack (i.e. lowest address accessible))
+;
+; Also this stuff is windows-only kind of thing, because linux people
+; didn't think stack that grows is cool enough for them, but the kernel
+; totally supports this kind of stack.
+__chkstk:
+	;; Allocate 16 bytes to store values of r10 and r11
+	sub   rsp, 0x10
+	mov   [rsp], r10
+	mov   [rsp+0x8], r11
+	;; Set r10 to point to the stack as of the moment of the function call
+	lea   r10, [rsp+0x18]
+	;; Subtract r10 til the bottom of the stack allocation, if we overflow
+	;; reset r10 to 0, we'll crash with segfault anyway
+	xor   r11, r11
+	sub   r10, rax
+	cmovb r10, r11
+	;; Load r11 with the bottom of the stack (lowest allocated address)
+	mov   r11, gs:[0x10] ; NOTE(flysand): gs:[0x10] is stack limit
+	;; If the bottom of the allocation is above the bottom of the stack,
+	;; we don't need to probe
+	cmp   r10, r11
+	jnb   .end
+	;; Align the bottom of the allocation down to page size
+	and   r10w, 0xf000
+.loop:
+	;; Move the pointer to the next guard page, and touch it by loading 0
+	;; into that page
+	lea   r11, [r11-0x1000]
+	mov   byte [r11], 0x0
+	;; Did we reach the bottom of the allocation?
+	cmp   r10, r11
+	jnz   .loop
+.end:
+	;; Restore previous r10 and r11 and return
+	mov   r10, [rsp]
+	mov   r11, [rsp+0x8]
+	add   rsp, 0x10
+	ret
+\ No newline at end of file
diff --git a/base/runtime/procs_windows_amd64.odin b/base/runtime/procs_windows_amd64.odin
new file mode 100644
index 000000000..ea495f5fa
--- /dev/null
+++ b/base/runtime/procs_windows_amd64.odin
@@ -0,0 +1,26 @@
+//+private
+//+no-instrumentation
+package runtime
+
+foreign import kernel32 "system:Kernel32.lib"
+
+@(private)
+foreign kernel32 {
+	RaiseException :: proc "system" (dwExceptionCode, dwExceptionFlags, nNumberOfArguments: u32, lpArguments: ^uint) -> ! ---
+}
+
+windows_trap_array_bounds :: proc "contextless" () -> ! {
+	EXCEPTION_ARRAY_BOUNDS_EXCEEDED :: 0xC000008C
+
+
+	RaiseException(EXCEPTION_ARRAY_BOUNDS_EXCEEDED, 0, 0, nil)
+}
+
+windows_trap_type_assertion :: proc "contextless" () -> ! {
+	windows_trap_array_bounds()
+}
+
+when ODIN_NO_CRT {
+	@(require)
+	foreign import crt_lib "procs_windows_amd64.asm"
+}
diff --git a/base/runtime/procs_windows_i386.odin b/base/runtime/procs_windows_i386.odin
new file mode 100644
index 000000000..10422cf07
--- /dev/null
+++ b/base/runtime/procs_windows_i386.odin
@@ -0,0 +1,29 @@
+//+private
+//+no-instrumentation
+package runtime
+
+@require foreign import "system:int64.lib"
+
+foreign import kernel32 "system:Kernel32.lib"
+
+windows_trap_array_bounds :: proc "contextless" () -> ! {
+	DWORD :: u32
+	ULONG_PTR :: uint
+
+	EXCEPTION_ARRAY_BOUNDS_EXCEEDED :: 0xC000008C
+
+	foreign kernel32 {
+		RaiseException :: proc "system" (dwExceptionCode, dwExceptionFlags, nNumberOfArguments: DWORD, lpArguments: ^ULONG_PTR) -> ! ---
+	}
+
+	RaiseException(EXCEPTION_ARRAY_BOUNDS_EXCEEDED, 0, 0, nil)
+}
+
+windows_trap_type_assertion :: proc "contextless" () -> ! {
+	windows_trap_array_bounds()
+}
+
+@(private, export, link_name="_fltused") _fltused: i32 = 0x9875
+
+@(private, export, link_name="_tls_index") _tls_index: u32
+@(private, export, link_name="_tls_array") _tls_array: u32
diff --git a/base/runtime/udivmod128.odin b/base/runtime/udivmod128.odin
new file mode 100644
index 000000000..87ef73c2c
--- /dev/null
+++ b/base/runtime/udivmod128.odin
@@ -0,0 +1,156 @@
+package runtime
+
+import "core:intrinsics"
+
+udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
+	_ctz :: intrinsics.count_trailing_zeros
+	_clz :: intrinsics.count_leading_zeros
+
+	n := transmute([2]u64)a
+	d := transmute([2]u64)b
+	q, r: [2]u64
+	sr: u32 = 0
+
+	low  :: 1 when ODIN_ENDIAN == .Big else 0
+	high :: 1 - low
+	U64_BITS :: 8*size_of(u64)
+	U128_BITS :: 8*size_of(u128)
+
+	// Special Cases
+
+	if n[high] == 0 {
+		if d[high] == 0 {
+			if rem != nil {
+				res := n[low] % d[low]
+				rem^ = u128(res)
+			}
+			return u128(n[low] / d[low])
+		}
+
+		if rem != nil {
+			rem^ = u128(n[low])
+		}
+		return 0
+	}
+
+	if d[low] == 0 {
+		if d[high] == 0 {
+			if rem != nil {
+				rem^ = u128(n[high] % d[low])
+			}
+			return u128(n[high] / d[low])
+		}
+		if n[low] == 0 {
+			if rem != nil {
+				r[high] = n[high] % d[high]
+				r[low] = 0
+				rem^ = transmute(u128)r
+			}
+			return u128(n[high] / d[high])
+		}
+
+		if d[high] & (d[high]-1) == 0 {
+			if rem != nil {
+				r[low] = n[low]
+				r[high] = n[high] & (d[high] - 1)
+				rem^ = transmute(u128)r
+			}
+			return u128(n[high] >> _ctz(d[high]))
+		}
+
+		sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
+		if sr > U64_BITS - 2 {
+			if rem != nil {
+				rem^ = a
+			}
+			return 0
+		}
+
+		sr += 1
+
+		q[low]  = 0
+		q[high] = n[low] << u64(U64_BITS - sr)
+		r[high] = n[high] >> sr
+		r[low]  = (n[high] << (U64_BITS - sr)) | (n[low] >> sr)
+	} else {
+		if d[high] == 0 {
+			if d[low] & (d[low] - 1) == 0 {
+				if rem != nil {
+					rem^ = u128(n[low] & (d[low] - 1))
+				}
+				if d[low] == 1 {
+					return a
+				}
+				sr = u32(_ctz(d[low]))
+				q[high] = n[high] >> sr
+				q[low] = (n[high] << (U64_BITS-sr)) | (n[low] >> sr)
+				return transmute(u128)q
+			}
+
+			sr = 1 + U64_BITS + u32(_clz(d[low])) - u32(_clz(n[high]))
+
+			switch {
+			case sr == U64_BITS:
+				q[low]  = 0
+				q[high] = n[low]
+				r[high] = 0
+				r[low]  = n[high]
+			case sr < U64_BITS:
+				q[low]  = 0
+				q[high] = n[low] << (U64_BITS - sr)
+				r[high] = n[high] >> sr
+				r[low]  = (n[high] << (U64_BITS - sr)) | (n[low] >> sr)
+			case:
+				q[low]  = n[low] << (U128_BITS - sr)
+				q[high] = (n[high] << (U128_BITS - sr)) | (n[low] >> (sr - U64_BITS))
+				r[high] = 0
+				r[low]  = n[high] >> (sr - U64_BITS)
+			}
+		} else {
+			sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
+
+			if sr > U64_BITS - 1 {
+				if rem != nil {
+					rem^ = a
+				}
+				return 0
+			}
+
+			sr += 1
+
+			q[low] = 0
+			if sr == U64_BITS {
+				q[high] = n[low]
+				r[high] = 0
+				r[low]  = n[high]
+			} else {
+				r[high] = n[high] >> sr
+				r[low]  = (n[high] << (U64_BITS - sr)) | (n[low] >> sr)
+				q[high] = n[low] << (U64_BITS - sr)
+			}
+		}
+	}
+
+	carry: u32 = 0
+	r_all: u128
+
+	for ; sr > 0; sr -= 1 {
+		r[high] = (r[high] << 1) | (r[low]  >> (U64_BITS - 1))
+		r[low]  = (r[low]  << 1) | (q[high] >> (U64_BITS - 1))
+		q[high] = (q[high] << 1) | (q[low]  >> (U64_BITS - 1))
+		q[low]  = (q[low]  << 1) | u64(carry)
+
+		r_all = transmute(u128)r
+		s := i128(b - r_all - 1) >> (U128_BITS - 1)
+		carry = u32(s & 1)
+		r_all -= b & transmute(u128)s
+		r = transmute([2]u64)r_all
+	}
+
+	q_all := ((transmute(u128)q) << 1) | u128(carry)
+	if rem != nil {
+		rem^ = r_all
+	}
+
+	return q_all
+}
author	gingerBill <bill@gingerbill.org>	2024-01-28 21:05:53 +0000
committer	gingerBill <bill@gingerbill.org>	2024-01-28 21:05:53 +0000
commit	09fa1c29cd014b4560b3c79c72db68af20ef8187 (patch)
tree	45095630fb03a50df20e0249f98879cf27d94397 /base/runtime
parent	ddcaa0de5395bfb1a2b004e6a6cb5e2ba1e2eed1 (diff)