Merge branch 'master' into file-tags-without-comments

author: Karl Zylinski <karl@zylinski.se> 2024-09-17 19:36:17 +0200
committer: Karl Zylinski <karl@zylinski.se> 2024-09-17 19:36:17 +0200
commit: 093ade050445b3e348177e30fb1fc9d726f7b289 (patch)
tree: 5122cfefa5fefbab9d27d5d8adacd8739eeeb5de
parent: 3d7b92426081cd9f3197b13f7384a52dbac5379a (diff)
parent: 6ef779cd5c8260b2e6979e676d28489fd53dd599 (diff)
97 files changed, 7980 insertions, 1337 deletions
diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin
index 8157afe09..67d249d11 100644
--- a/base/runtime/core_builtin.odin
+++ b/base/runtime/core_builtin.odin
@@ -913,7 +913,7 @@ card :: proc "contextless" (s: $S/bit_set[$E; $U]) -> int {
 
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
-assert :: proc(condition: bool, message := "", loc := #caller_location) {
+assert :: proc(condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not
@@ -952,7 +952,7 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
 
 @builtin
 @(disabled=ODIN_DISABLE_ASSERT)
-assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
+assert_contextless :: proc "contextless" (condition: bool, message := #caller_expression(condition), loc := #caller_location) {
 	if !condition {
 		// NOTE(bill): This is wrapped in a procedure call
 		// to improve performance to make the CPU not
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin
index 6905317fc..59811a525 100644
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -118,16 +118,15 @@ mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> r
 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
 
 mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
-	if size == 0 {
-		return nil, nil
-	}
-	if allocator.procedure == nil {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
+	if size == 0 || allocator.procedure == nil{
 		return nil, nil
 	}
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 }
 
 mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 	}
@@ -135,6 +134,7 @@ mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, a
 }
 
 mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
 	}
@@ -174,6 +174,7 @@ mem_free_all :: #force_inline proc(allocator := context.allocator, loc := #calle
 }
 
 _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if allocator.procedure == nil {
 		return nil, nil
 	}
@@ -215,9 +216,11 @@ _mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignmen
 }
 
 mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, true, loc)
 }
 non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
 }
 
diff --git a/core/bytes/bytes.odin b/core/bytes/bytes.odin
index 45eb44307..c0d25bcce 100644
--- a/core/bytes/bytes.odin
+++ b/core/bytes/bytes.odin
@@ -334,7 +334,7 @@ Inputs:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 */
-index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
+index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds_check {
 	i, l := 0, len(s)
 
 	// Guard against small strings.  On modern systems, it is ALWAYS
@@ -469,7 +469,7 @@ Inputs:
 Returns:
 - index: The index of the byte `c`, or -1 if it was not found.
 */
-last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
+last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_check {
 	i := len(s)
 
 	// Guard against small strings.  On modern systems, it is ALWAYS
diff --git a/core/mem/alloc.odin b/core/mem/alloc.odin
index e51d971e1..fac58daaf 100644
--- a/core/mem/alloc.odin
+++ b/core/mem/alloc.odin
@@ -2,109 +2,711 @@ package mem
 
 import "base:runtime"
 
-// NOTE(bill, 2019-12-31): These are defined in `package runtime` as they are used in the `context`. This is to prevent an import definition cycle.
-Allocator_Mode :: runtime.Allocator_Mode
+//NOTE(bill, 2019-12-31): These are defined in `package runtime` as they are used in the `context`. This is to prevent an import definition cycle.
+
 /*
-Allocator_Mode :: enum byte {
-	Alloc,
-	Free,
-	Free_All,
-	Resize,
-	Query_Features,
-	Alloc_Non_Zeroed,
-	Resize_Non_Zeroed,
-}
+A request to allocator procedure.
+
+This type represents a type of allocation request made to an allocator
+procedure. There is one allocator procedure per allocator, and this value is
+used to discriminate between different functions of the allocator.
+
+The type is defined as follows:
+
+	Allocator_Mode :: enum byte {
+		Alloc,
+		Alloc_Non_Zeroed,
+		Free,
+		Free_All,
+		Resize,
+		Resize_Non_Zeroed,
+		Query_Features,
+	}
+
+Depending on which value is used, the allocator procedure will perform different
+functions:
+
+- `Alloc`: Allocates a memory region with a given `size` and `alignment`.
+- `Alloc_Non_Zeroed`: Same as `Alloc` without explicit zero-initialization of
+	the memory region.
+- `Free`: Free a memory region located at address `ptr` with a given `size`.
+- `Free_All`: Free all memory allocated using this allocator.
+- `Resize`: Resize a memory region located at address `old_ptr` with size
+	`old_size` to be `size` bytes in length and have the specified `alignment`,
+	in case a re-alllocation occurs.
+- `Resize_Non_Zeroed`: Same as `Resize`, without explicit zero-initialization.
 */
+Allocator_Mode :: runtime.Allocator_Mode
 
-Allocator_Mode_Set :: runtime.Allocator_Mode_Set
 /*
-Allocator_Mode_Set :: distinct bit_set[Allocator_Mode];
+A set of allocator features.
+
+This type represents values that contain a set of features an allocator has.
+Currently the type is defined as follows:
+
+	Allocator_Mode_Set :: distinct bit_set[Allocator_Mode];
 */
+Allocator_Mode_Set :: runtime.Allocator_Mode_Set
 
-Allocator_Query_Info :: runtime.Allocator_Query_Info
 /*
-Allocator_Query_Info :: struct {
-	pointer:   rawptr,
-	size:      Maybe(int),
-	alignment: Maybe(int),
-}
+Allocator information.
+
+This type represents information about a given allocator at a specific point
+in time. Currently the type is defined as follows:
+
+	Allocator_Query_Info :: struct {
+		pointer:   rawptr,
+		size:      Maybe(int),
+		alignment: Maybe(int),
+	}
+
+- `pointer`: Pointer to a backing buffer.
+- `size`: Size of the backing buffer.
+- `alignment`: The allocator's alignment.
+
+If not applicable, any of these fields may be `nil`.
 */
+Allocator_Query_Info :: runtime.Allocator_Query_Info
+
+/*
+An allocation request error.
 
+This type represents error values the allocators may return upon requests.
+
+	Allocator_Error :: enum byte {
+		None                 = 0,
+		Out_Of_Memory        = 1,
+		Invalid_Pointer      = 2,
+		Invalid_Argument     = 3,
+		Mode_Not_Implemented = 4,
+	}
+
+The meaning of the errors is as follows:
+
+- `None`: No error.
+- `Out_Of_Memory`: Either:
+	1. The allocator has ran out of the backing buffer, or the requested
+		allocation size is too large to fit into a backing buffer.
+	2. The operating system error during memory allocation.
+	3. The backing allocator was used to allocate a new backing buffer and the
+		backing allocator returned Out_Of_Memory.
+- `Invalid_Pointer`: The pointer referring to a memory region does not belong
+	to any of the allocators backing buffers or does not point to a valid start
+	of an allocation made in that allocator.
+- `Invalid_Argument`: Can occur if one of the arguments makes it impossible to
+	satisfy a request (i.e. having alignment larger than the backing buffer
+	of the allocation).
+- `Mode_Not_Implemented`: The allocator does not support the specified
+	operation. For example, an arena does not support freeing individual
+	allocations.
+*/
 Allocator_Error :: runtime.Allocator_Error
+
 /*
-Allocator_Error :: enum byte {
-	None                 = 0,
-	Out_Of_Memory        = 1,
-	Invalid_Pointer      = 2,
-	Invalid_Argument     = 3,
-	Mode_Not_Implemented = 4,
-}
+The allocator procedure.
+
+This type represents allocation procedures. An allocation procedure is a single
+procedure, implementing all allocator functions such as allocating the memory,
+freeing the memory, etc.
+
+Currently the type is defined as follows:
+
+	Allocator_Proc :: #type proc(
+		allocator_data: rawptr,
+		mode: Allocator_Mode,
+		size: int,
+		alignment: int,
+		old_memory: rawptr,
+		old_size: int,
+		location: Source_Code_Location = #caller_location,
+	) -> ([]byte, Allocator_Error);
+
+The function of this procedure and the meaning of parameters depends on the
+value of the `mode` parameter. For any operation the following constraints
+apply:
+
+- The `alignment` must be a power of two.
+- The `size` must be a positive integer.
+
+## 1. `.Alloc`, `.Alloc_Non_Zeroed`
+
+Allocates a memory region of size `size`, aligned on a boundary specified by
+`alignment`.
+
+**Inputs**:
+- `allocator_data`: Pointer to the allocator data.
+- `mode`: `.Alloc` or `.Alloc_Non_Zeroed`.
+- `size`: The desired size of the memory region.
+- `alignment`: The desired alignmnet of the allocation.
+- `old_memory`: Unused, should  be `nil`.
+- `old_size`: Unused, should be 0.
+
+**Returns**:
+1. The memory region, if allocated successfully, or `nil` otherwise.
+2. An error, if allocation failed.
+
+**Note**: The nil allocator may return `nil`, even if no error is returned.
+Always check both the error and the allocated buffer.
+
+**Note**: The `.Alloc` mode is required to be implemented for an allocator
+and can not return a `.Mode_Not_Implemented` error.
+
+## 2. `Free`
+
+Frees a memory region located at the address specified by `old_memory`. If the
+allocator does not track sizes of allocations, the size should be specified in
+the `old_size` parameter.
+
+**Inputs**:
+- `allocator_data`: Pointer to the allocator data.
+- `mode`: `.Free`.
+- `size`: Unused, should be 0.
+- `alignment`: Unused, should be 0.
+- `old_memory`: Pointer to the memory region to free.
+- `old_size`: The size of the memory region to free. This parameter is optional
+	if the allocator keeps track of the sizes of allocations.
+
+**Returns**:
+1. `nil`
+2. Error, if freeing failed.
+
+## 3. `Free_All`
+
+Frees all allocations, associated with the allocator, making it available for
+further allocations using the same backing buffers.
+
+**Inputs**:
+- `allocator_data`: Pointer to the allocator data.
+- `mode`: `.Free_All`.
+- `size`: Unused, should be 0.
+- `alignment`: Unused, should be 0.
+- `old_memory`: Unused, should be `nil`.
+- `old_size`: Unused, should be `0`.
+
+**Returns**:
+1. `nil`.
+2. Error, if freeing failed.
+
+## 4. `Resize`, `Resize_Non_Zeroed`
+
+Resizes the memory region, of the size `old_size` located at the address
+specified by `old_memory` to have the new size `size`. The slice of the new
+memory region is returned from the procedure. The allocator may attempt to
+keep the new memory region at the same address as the previous allocation,
+however no such guarantee is made. Do not assume the new memory region will
+be at the same address as the old memory region.
+
+If `old_memory` pointer is `nil`, this function acts just like `.Alloc` or
+`.Alloc_Non_Zeroed`, using `size` and `alignment` to allocate a new memory
+region.
+
+If `new_size` is `nil`, the procedure acts just like `.Free`, freeing the
+memory region `old_size` bytes in length, located at the address specified by
+`old_memory`.
+
+If the `old_memory` pointer is not aligned to the boundary specified by
+`alignment`, the procedure relocates the buffer such that the reallocated
+buffer is aligned to the boundary specified by `alignment`.
+
+**Inputs**:
+- `allocator_data`: Pointer to the allocator data.
+- `mode`: `.Resize` or `.Resize_All`.
+- `size`: The desired new size of the memory region.
+- `alignment`: The alignment of the new memory region, if its allocated
+- `old_memory`: The pointer to the memory region to resize.
+- `old_size`: The size of the memory region to resize. If the allocator
+	keeps track of the sizes of allocations, this parameter is optional.
+
+**Returns**:
+1. The slice of the  memory region after resize operation, if successfull,
+	`nil` otherwise.
+2. An error, if the resize failed.
+
+**Note**: Some allocators may return `nil`, even if no error is returned.
+Always check both the error and the allocated buffer.
+
+**Note**: if `old_size` is `0` and `old_memory` is `nil`, this operation is a
+no-op, and should not return errors.
 */
 Allocator_Proc :: runtime.Allocator_Proc
+
 /*
-Allocator_Proc :: #type proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, location: Source_Code_Location = #caller_location) -> ([]byte, Allocator_Error);
-*/
+Allocator.
+
+This type represents generic interface for all allocators. Currently this type
+is defined as follows:
 
+	Allocator :: struct {
+		procedure: Allocator_Proc,
+		data: rawptr,
+	}
+
+- `procedure`: Pointer to the allocation procedure.
+- `data`: Pointer to the allocator data.
+*/
 Allocator :: runtime.Allocator
+
 /*
-Allocator :: struct {
-	procedure: Allocator_Proc,
-	data:      rawptr,
-}
-*/
+Default alignment.
 
+This value is the default alignment for all platforms that is used, if the
+alignment is not specified explicitly.
+*/
 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)
 
+/*
+Default page size.
+
+This value is the default page size for the current platform.
+*/
 DEFAULT_PAGE_SIZE ::
 	64 * 1024 when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 else
 	16 * 1024 when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 else
 	4 * 1024
 
+/*
+Allocate memory.
+
+This function allocates `size` bytes of memory, aligned to a boundary specified
+by `alignment` using the allocator specified by `allocator`.
+
+If the `size` parameter is `0`, the operation is a no-op.
+
+**Inputs**:
+- `size`: The desired size of the allocated memory region.
+- `alignment`: The desired alignment of the allocated memory region.
+- `allocator`: The allocator to allocate from.
+
+**Returns**:
+1. Pointer to the allocated memory, or `nil` if allocation failed.
+2. Error, if the allocation failed.
+
+**Errors**:
+- `None`: If no error occurred.
+- `Out_Of_Memory`: Occurs when the allocator runs out of space in any of its
+	backing buffers, the backing allocator has ran out of space, or an operating
+	system failure occurred.
+- `Invalid_Argument`: If the supplied `size` is negative, alignment is not a
+	power of two.
+*/
 @(require_results)
-alloc :: proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (rawptr, Allocator_Error) {
+alloc :: proc(
+	size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
 	data, err := runtime.mem_alloc(size, alignment, allocator, loc)
 	return raw_data(data), err
 }
 
+/*
+Allocate memory.
+
+This function allocates `size` bytes of memory, aligned to a boundary specified
+by `alignment` using the allocator specified by `allocator`.
+
+**Inputs**:
+- `size`: The desired size of the allocated memory region.
+- `alignment`: The desired alignment of the allocated memory region.
+- `allocator`: The allocator to allocate from.
+
+**Returns**:
+1. Slice of the allocated memory region, or `nil` if allocation failed.
+2. Error, if the allocation failed.
+
+**Errors**:
+- `None`: If no error occurred.
+- `Out_Of_Memory`: Occurs when the allocator runs out of space in any of its
+	backing buffers, the backing allocator has ran out of space, or an operating
+	system failure occurred.
+- `Invalid_Argument`: If the supplied `size` is negative, alignment is not a
+	power of two.
+*/
 @(require_results)
-alloc_bytes :: proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+alloc_bytes :: proc(
+	size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	return runtime.mem_alloc(size, alignment, allocator, loc)
 }
 
+/*
+Allocate non-zeroed memory.
+
+This function allocates `size` bytes of memory, aligned to a boundary specified
+by `alignment` using the allocator specified by `allocator`. This procedure
+does not explicitly zero-initialize allocated memory region.
+
+**Inputs**:
+- `size`: The desired size of the allocated memory region.
+- `alignment`: The desired alignment of the allocated memory region.
+- `allocator`: The allocator to allocate from.
+
+**Returns**:
+1. Slice of the allocated memory region, or `nil` if allocation failed.
+2. Error, if the allocation failed.
+
+**Errors**:
+- `None`: If no error occurred.
+- `Out_Of_Memory`: Occurs when the allocator runs out of space in any of its
+	backing buffers, the backing allocator has ran out of space, or an operating
+	system failure occurred.
+- `Invalid_Argument`: If the supplied `size` is negative, alignment is not a
+	power of two.
+*/
 @(require_results)
-alloc_bytes_non_zeroed :: proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+alloc_bytes_non_zeroed :: proc(
+	size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	return runtime.mem_alloc_non_zeroed(size, alignment, allocator, loc)
 }
 
-free :: proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+/*
+Free memory.
+
+This procedure frees memory region located at the address, specified by `ptr`,
+allocated from the allocator specified by `allocator`.
+
+**Inputs**:
+- `ptr`: Pointer to the memory region to free.
+- `allocator`: The allocator to free to.
+
+**Returns**:
+- The error, if freeing failed.
+
+**Errors**:
+- `None`: When no error has occurred.
+- `Invalid_Pointer`: The specified pointer is not owned by the specified allocator,
+	or does not point to a valid allocation.
+- `Mode_Not_Implemented`: If the specified allocator does not support the `.Free`
+mode.
+*/
+free :: proc(
+	ptr: rawptr,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.mem_free(ptr, allocator, loc)
 }
 
-free_with_size :: proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
-	return runtime.mem_free_with_size(ptr, byte_count, allocator, loc)
+/*
+Free a memory region.
+
+This procedure frees `size` bytes of memory region located at the address,
+specified by `ptr`, allocated from the allocator specified by `allocator`.
+
+If the `size` parameter is `0`, this call is equivalent to `free()`.
+
+**Inputs**:
+- `ptr`: Pointer to the memory region to free.
+- `size`: The size of the memory region to free.
+- `allocator`: The allocator to free to.
+
+**Returns**:
+- The error, if freeing failed.
+
+**Errors**:
+- `None`: When no error has occurred.
+- `Invalid_Pointer`: The specified pointer is not owned by the specified allocator,
+	or does not point to a valid allocation.
+- `Mode_Not_Implemented`: If the specified allocator does not support the `.Free`
+mode.
+*/
+free_with_size :: proc(
+	ptr: rawptr,
+	size: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
+	return runtime.mem_free_with_size(ptr, size, allocator, loc)
 }
 
-free_bytes :: proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+/*
+Free a memory region.
+
+This procedure frees memory region, specified by `bytes`, allocated from the
+allocator specified by `allocator`.
+
+If the length of the specified slice is zero, the `.Invalid_Argument` error
+is returned.
+
+**Inputs**:
+- `bytes`: The memory region to free.
+- `allocator`: The allocator to free to.
+
+**Returns**:
+- The error, if freeing failed.
+
+**Errors**:
+- `None`: When no error has occurred.
+- `Invalid_Pointer`: The specified pointer is not owned by the specified allocator,
+	or does not point to a valid allocation.
+- `Mode_Not_Implemented`: If the specified allocator does not support the `.Free`
+mode.
+*/
+free_bytes :: proc(
+	bytes: []byte,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.mem_free_bytes(bytes, allocator, loc)
 }
 
+/*
+Free all allocations.
+
+This procedure frees all allocations made on the allocator specified by
+`allocator` to that allocator, making it available for further allocations.
+
+**Inputs**:
+- `allocator`: The allocator to free to.
+
+**Errors**:
+- `None`: When no error has occurred.
+- `Mode_Not_Implemented`: If the specified allocator does not support the `.Free`
+mode.
+*/
 free_all :: proc(allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
 	return runtime.mem_free_all(allocator, loc)
 }
 
+/*
+Resize a memory region.
+
+This procedure resizes a memory region, `old_size` bytes in size, located at
+the address specified by `ptr`, such that it has a new size, specified by
+`new_size` and and is aligned on a boundary specified by `alignment`.
+
+If the `ptr` parameter is `nil`, `resize()` acts just like `alloc()`, allocating
+`new_size` bytes, aligned on a boundary specified by `alignment`.
+
+If the `new_size` parameter is `0`, `resize()` acts just like `free()`, freeing
+the memory region `old_size` bytes in length, located at the address specified
+by `ptr`.
+
+If the `old_memory` pointer is not aligned to the boundary specified by
+`alignment`, the procedure relocates the buffer such that the reallocated
+buffer is aligned to the boundary specified by `alignment`.
+
+**Inputs**:
+- `ptr`: Pointer to the memory region to resize.
+- `old_size`: Size of the memory region to resize.
+- `new_size`: The desired size of the resized memory region.
+- `alignment`: The desired alignment of the resized memory region.
+- `allocator`: The owner of the memory region to resize.
+
+**Returns**:
+1. The pointer to the resized memory region, if successfull, `nil` otherwise.
+2. Error, if resize failed.
+
+**Errors**:
+- `None`: No error.
+- `Out_Of_Memory`: When the allocator's backing buffer or it's backing
+	allocator does not have enough space to fit in an allocation with the new
+	size, or an operating system failure occurs.
+- `Invalid_Pointer`: The pointer referring to a memory region does not belong
+	to any of the allocators backing buffers or does not point to a valid start
+	of an allocation made in that allocator.
+- `Invalid_Argument`: When `size` is negative, alignment is not a power of two,
+	or the `old_size` argument is incorrect.
+- `Mode_Not_Implemented`: The allocator does not support the `.Realloc` mode.
+
+**Note**: if `old_size` is `0` and `old_memory` is `nil`, this operation is a
+no-op, and should not return errors.
+*/
 @(require_results)
-resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> (rawptr, Allocator_Error) {
+resize :: proc(
+	ptr: rawptr,
+	old_size: int,
+	new_size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
 	data, err := runtime.mem_resize(ptr, old_size, new_size, alignment, allocator, loc)
 	return raw_data(data), err
 }
 
+/*
+Resize a memory region without zero-initialization.
+
+This procedure resizes a memory region, `old_size` bytes in size, located at
+the address specified by `ptr`, such that it has a new size, specified by
+`new_size` and and is aligned on a boundary specified by `alignment`.
+
+If the `ptr` parameter is `nil`, `resize()` acts just like `alloc()`, allocating
+`new_size` bytes, aligned on a boundary specified by `alignment`.
+
+If the `new_size` parameter is `0`, `resize()` acts just like `free()`, freeing
+the memory region `old_size` bytes in length, located at the address specified
+by `ptr`.
+
+If the `old_memory` pointer is not aligned to the boundary specified by
+`alignment`, the procedure relocates the buffer such that the reallocated
+buffer is aligned to the boundary specified by `alignment`.
+
+Unlike `resize()`, this procedure does not explicitly zero-initialize any new
+memory.
+
+**Inputs**:
+- `ptr`: Pointer to the memory region to resize.
+- `old_size`: Size of the memory region to resize.
+- `new_size`: The desired size of the resized memory region.
+- `alignment`: The desired alignment of the resized memory region.
+- `allocator`: The owner of the memory region to resize.
+
+**Returns**:
+1. The pointer to the resized memory region, if successfull, `nil` otherwise.
+2. Error, if resize failed.
+
+**Errors**:
+- `None`: No error.
+- `Out_Of_Memory`: When the allocator's backing buffer or it's backing
+	allocator does not have enough space to fit in an allocation with the new
+	size, or an operating system failure occurs.
+- `Invalid_Pointer`: The pointer referring to a memory region does not belong
+	to any of the allocators backing buffers or does not point to a valid start
+	of an allocation made in that allocator.
+- `Invalid_Argument`: When `size` is negative, alignment is not a power of two,
+	or the `old_size` argument is incorrect.
+- `Mode_Not_Implemented`: The allocator does not support the `.Realloc` mode.
+
+**Note**: if `old_size` is `0` and `old_memory` is `nil`, this operation is a
+no-op, and should not return errors.
+*/
 @(require_results)
-resize_bytes :: proc(old_data: []byte, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+resize_non_zeroed :: proc(
+	ptr: rawptr,
+	old_size: int,
+	new_size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	data, err := runtime.non_zero_mem_resize(ptr, old_size, new_size, alignment, allocator, loc)
+	return raw_data(data), err
+}
+
+/*
+Resize a memory region.
+
+This procedure resizes a memory region, specified by `old_data`, such that it
+has a new size, specified by `new_size` and and is aligned on a boundary
+specified by `alignment`.
+
+If the `old_data` parameter is `nil`, `resize_bytes()` acts just like
+`alloc_bytes()`, allocating `new_size` bytes, aligned on a boundary specified
+by `alignment`.
+
+If the `new_size` parameter is `0`, `resize_bytes()` acts just like
+`free_bytes()`, freeing the memory region specified by `old_data`.
+
+If the `old_memory` pointer is not aligned to the boundary specified by
+`alignment`, the procedure relocates the buffer such that the reallocated
+buffer is aligned to the boundary specified by `alignment`.
+
+**Inputs**:
+- `old_data`: Pointer to the memory region to resize.
+- `new_size`: The desired size of the resized memory region.
+- `alignment`: The desired alignment of the resized memory region.
+- `allocator`: The owner of the memory region to resize.
+
+**Returns**:
+1. The resized memory region, if successfull, `nil` otherwise.
+2. Error, if resize failed.
+
+**Errors**:
+- `None`: No error.
+- `Out_Of_Memory`: When the allocator's backing buffer or it's backing
+	allocator does not have enough space to fit in an allocation with the new
+	size, or an operating system failure occurs.
+- `Invalid_Pointer`: The pointer referring to a memory region does not belong
+	to any of the allocators backing buffers or does not point to a valid start
+	of an allocation made in that allocator.
+- `Invalid_Argument`: When `size` is negative, alignment is not a power of two,
+	or the `old_size` argument is incorrect.
+- `Mode_Not_Implemented`: The allocator does not support the `.Realloc` mode.
+
+**Note**: if `old_size` is `0` and `old_memory` is `nil`, this operation is a
+no-op, and should not return errors.
+*/
+@(require_results)
+resize_bytes :: proc(
+	old_data: []byte,
+	new_size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	return runtime.mem_resize(raw_data(old_data), len(old_data), new_size, alignment, allocator, loc)
 }
 
+/*
+Resize a memory region.
+
+This procedure resizes a memory region, specified by `old_data`, such that it
+has a new size, specified by `new_size` and and is aligned on a boundary
+specified by `alignment`.
+
+If the `old_data` parameter is `nil`, `resize_bytes()` acts just like
+`alloc_bytes()`, allocating `new_size` bytes, aligned on a boundary specified
+by `alignment`.
+
+If the `new_size` parameter is `0`, `resize_bytes()` acts just like
+`free_bytes()`, freeing the memory region specified by `old_data`.
+
+If the `old_memory` pointer is not aligned to the boundary specified by
+`alignment`, the procedure relocates the buffer such that the reallocated
+buffer is aligned to the boundary specified by `alignment`.
+
+Unlike `resize_bytes()`, this procedure does not explicitly zero-initialize
+any new memory.
+
+**Inputs**:
+- `old_data`: Pointer to the memory region to resize.
+- `new_size`: The desired size of the resized memory region.
+- `alignment`: The desired alignment of the resized memory region.
+- `allocator`: The owner of the memory region to resize.
+
+**Returns**:
+1. The resized memory region, if successfull, `nil` otherwise.
+2. Error, if resize failed.
+
+**Errors**:
+- `None`: No error.
+- `Out_Of_Memory`: When the allocator's backing buffer or it's backing
+	allocator does not have enough space to fit in an allocation with the new
+	size, or an operating system failure occurs.
+- `Invalid_Pointer`: The pointer referring to a memory region does not belong
+	to any of the allocators backing buffers or does not point to a valid start
+	of an allocation made in that allocator.
+- `Invalid_Argument`: When `size` is negative, alignment is not a power of two,
+	or the `old_size` argument is incorrect.
+- `Mode_Not_Implemented`: The allocator does not support the `.Realloc` mode.
+
+**Note**: if `old_size` is `0` and `old_memory` is `nil`, this operation is a
+no-op, and should not return errors.
+*/
+@(require_results)
+resize_bytes_non_zeroed :: proc(
+	old_data: []byte,
+	new_size: int,
+	alignment: int = DEFAULT_ALIGNMENT,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	return runtime.non_zero_mem_resize(raw_data(old_data), len(old_data), new_size, alignment, allocator, loc)
+}
+
+/*
+Query allocator features.
+*/
 @(require_results)
 query_features :: proc(allocator: Allocator, loc := #caller_location) -> (set: Allocator_Mode_Set) {
 	if allocator.procedure != nil {
@@ -114,8 +716,15 @@ query_features :: proc(allocator: Allocator, loc := #caller_location) -> (set: A
 	return nil
 }
 
+/*
+Query allocator information.
+*/
 @(require_results)
-query_info :: proc(pointer: rawptr, allocator: Allocator, loc := #caller_location) -> (props: Allocator_Query_Info) {
+query_info :: proc(
+	pointer: rawptr,
+	allocator: Allocator,
+	loc := #caller_location,
+) -> (props: Allocator_Query_Info) {
 	props.pointer = pointer
 	if allocator.procedure != nil {
 		allocator.procedure(allocator.data, .Query_Info, 0, 0, &props, 0, loc)
@@ -123,25 +732,62 @@ query_info :: proc(pointer: rawptr, allocator: Allocator, loc := #caller_locatio
 	return
 }
 
-
-
-delete_string :: proc(str: string, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+/*
+Free a string.
+*/
+delete_string :: proc(
+	str: string,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.delete_string(str, allocator, loc)
 }
-delete_cstring :: proc(str: cstring, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+
+/*
+Free a cstring.
+*/
+delete_cstring :: proc(
+	str: cstring,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.delete_cstring(str, allocator, loc)
 }
-delete_dynamic_array :: proc(array: $T/[dynamic]$E, loc := #caller_location) -> Allocator_Error {
+
+/*
+Free a dynamic array.
+*/
+delete_dynamic_array :: proc(
+	array: $T/[dynamic]$E,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.delete_dynamic_array(array, loc)
 }
-delete_slice :: proc(array: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+
+/*
+Free a slice.
+*/
+delete_slice :: proc(
+	array: $T/[]$E,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.delete_slice(array, allocator, loc)
 }
-delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error {
+
+/*
+Free a map.
+*/
+delete_map :: proc(
+	m: $T/map[$K]$V,
+	loc := #caller_location,
+) -> Allocator_Error {
 	return runtime.delete_map(m, loc)
 }
 
-
+/*
+Free.
+*/
 delete :: proc{
 	delete_string,
 	delete_cstring,
@@ -150,49 +796,177 @@ delete :: proc{
 	delete_map,
 }
 
+/*
+Allocate a new object.
 
+This procedure allocates a new object of type `T` using an allocator specified
+by `allocator`, and returns a pointer to the allocated object, if allocated
+successfully, or `nil` otherwise.
+*/
 @(require_results)
-new :: proc($T: typeid, allocator := context.allocator, loc := #caller_location) -> (^T, Allocator_Error) {
+new :: proc(
+	$T: typeid,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (^T, Allocator_Error) {
 	return new_aligned(T, align_of(T), allocator, loc)
 }
+
+/*
+Allocate a new object with alignment.
+
+This procedure allocates a new object of type `T` using an allocator specified
+by `allocator`, and returns a pointer, aligned on a boundary specified by
+`alignment`  to the allocated object, if allocated successfully, or `nil`
+otherwise.
+*/
 @(require_results)
-new_aligned :: proc($T: typeid, alignment: int, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) {
+new_aligned :: proc(
+	$T: typeid,
+	alignment: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (t: ^T, err: Allocator_Error) {
 	return runtime.new_aligned(T, alignment, allocator, loc)
 }
+
+/*
+Allocate a new object and initialize it with a value.
+
+This procedure allocates a new object of type `T` using an allocator specified
+by `allocator`, and returns a pointer, aligned on a boundary specified by
+`alignment`  to the allocated object, if allocated successfully, or `nil`
+otherwise. The allocated object is initialized with `data`.
+*/
 @(require_results)
-new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) {
+new_clone :: proc(
+	data: $T,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (t: ^T, err: Allocator_Error) {
 	return runtime.new_clone(data, allocator, loc)
 }
 
+/*
+Allocate a new slice with alignment.
+
+This procedure allocates a new slice of type `T` with length `len`, aligned
+on a boundary specified by `alignment` from an allocator specified by
+`allocator`, and returns the allocated slice.
+*/
 @(require_results)
-make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (slice: T, err: Allocator_Error) {
+make_aligned :: proc(
+	$T: typeid/[]$E,
+	#any_int len: int,
+	alignment: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (slice: T, err: Allocator_Error) {
 	return runtime.make_aligned(T, len, alignment, allocator, loc)
 }
+
+/*
+Allocate a new slice.
+
+This procedure allocates a new slice of type `T` with length `len`, from an
+allocator specified by `allocator`, and returns the allocated slice.
+*/
 @(require_results)
-make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) {
+make_slice :: proc(
+	$T: typeid/[]$E,
+	#any_int len: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (T, Allocator_Error) {
 	return runtime.make_slice(T, len, allocator, loc)
 }
+
+/*
+Allocate a dynamic array.
+
+This procedure creates a dynamic array of type `T`, with `allocator` as its
+backing allocator, and initial length and capacity of `0`.
+*/
 @(require_results)
-make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) {
+make_dynamic_array :: proc(
+	$T: typeid/[dynamic]$E,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (T, Allocator_Error) {
 	return runtime.make_dynamic_array(T, allocator, loc)
 }
+
+/*
+Allocate a dynamic array with initial length.
+
+This procedure creates a dynamic array of type `T`, with `allocator` as its
+backing allocator, and initial capacity of `0`, and initial length specified by
+`len`.
+*/
 @(require_results)
-make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) {
+make_dynamic_array_len :: proc(
+	$T: typeid/[dynamic]$E,
+	#any_int len: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (T, Allocator_Error) {
 	return runtime.make_dynamic_array_len_cap(T, len, len, allocator, loc)
 }
+
+/*
+Allocate a dynamic array with initial length and capacity.
+
+This procedure creates a dynamic array of type `T`, with `allocator` as its
+backing allocator, and initial capacity specified by `cap`, and initial length
+specified by `len`.
+*/
 @(require_results)
-make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) {
+make_dynamic_array_len_cap :: proc(
+	$T: typeid/[dynamic]$E,
+	#any_int len: int,
+	#any_int cap: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (array: T, err: Allocator_Error) {
 	return runtime.make_dynamic_array_len_cap(T, len, cap, allocator, loc)
 }
+
+/*
+Allocate a map.
+
+This procedure creates a map of type `T` with initial capacity specified by
+`cap`, that is using an allocator specified by `allocator` as its backing
+allocator.
+*/
 @(require_results)
-make_map :: proc($T: typeid/map[$K]$E, #any_int cap: int = 1<<runtime.MAP_MIN_LOG2_CAPACITY, allocator := context.allocator, loc := #caller_location) -> (m: T, err: Allocator_Error) {
+make_map :: proc(
+	$T: typeid/map[$K]$E,
+	#any_int cap: int = 1<<runtime.MAP_MIN_LOG2_CAPACITY,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (m: T, err: Allocator_Error) {
 	return runtime.make_map(T, cap, allocator, loc)
 }
+
+/*
+Allocate a multi pointer.
+
+This procedure allocates a multipointer of type `T` pointing to `len` elements,
+from an allocator specified by `allocator`.
+*/
 @(require_results)
-make_multi_pointer :: proc($T: typeid/[^]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (mp: T, err: Allocator_Error) {
+make_multi_pointer :: proc(
+	$T: typeid/[^]$E,
+	#any_int len: int,
+	allocator := context.allocator,
+	loc := #caller_location
+) -> (mp: T, err: Allocator_Error) {
 	return runtime.make_multi_pointer(T, len, allocator, loc)
 }
 
+/*
+Allocate.
+*/
 make :: proc{
 	make_slice,
 	make_dynamic_array,
@@ -202,26 +976,112 @@ make :: proc{
 	make_multi_pointer,
 }
 
+/*
+Default resize procedure.
+
+When allocator does not support resize operation, but supports `.Alloc` and
+`.Free`, this procedure is used to implement allocator's default behavior on
+resize.
 
+The behavior of the function is as follows:
+
+- If `new_size` is `0`, the function acts like `free()`, freeing the memory
+	region of `old_size` bytes located at `old_memory`.
+- If `old_memory` is `nil`, the function acts like `alloc()`, allocating
+	`new_size` bytes of memory aligned on a boundary specified by `alignment`.
+- Otherwise, a new memory region of size `new_size` is allocated, then the
+	data from the old memory region is copied and the old memory region is
+	freed.
+*/
 @(require_results)
-default_resize_align :: proc(old_memory: rawptr, old_size, new_size, alignment: int, allocator := context.allocator, loc := #caller_location) -> (res: rawptr, err: Allocator_Error) {
+default_resize_align :: proc(
+	old_memory: rawptr,
+	old_size: int,
+	new_size: int,
+	alignment: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> (res: rawptr, err: Allocator_Error) {
 	data: []byte
-	data, err = default_resize_bytes_align(([^]byte)(old_memory)[:old_size], new_size, alignment, allocator, loc)
+	data, err = default_resize_bytes_align(
+		([^]byte) (old_memory)[:old_size],
+		new_size,
+		alignment,
+		allocator,
+		loc,
+	)
 	res = raw_data(data)
 	return
 }
 
+/*
+Default resize procedure.
+
+When allocator does not support resize operation, but supports
+`.Alloc_Non_Zeroed` and `.Free`, this procedure is used to implement allocator's
+default behavior on resize.
+
+Unlike `default_resize_align` no new memory is being explicitly
+zero-initialized.
+
+The behavior of the function is as follows:
+
+- If `new_size` is `0`, the function acts like `free()`, freeing the memory
+	region of `old_size` bytes located at `old_memory`.
+- If `old_memory` is `nil`, the function acts like `alloc()`, allocating
+	`new_size` bytes of memory aligned on a boundary specified by `alignment`.
+- Otherwise, a new memory region of size `new_size` is allocated, then the
+	data from the old memory region is copied and the old memory region is
+	freed.
+*/
 @(require_results)
-default_resize_bytes_align_non_zeroed :: proc(old_data: []byte, new_size, alignment: int, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+default_resize_bytes_align_non_zeroed :: proc(
+	old_data: []byte,
+	new_size: int,
+	alignment: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	return _default_resize_bytes_align(old_data, new_size, alignment, false, allocator, loc)
 }
+
+/*
+Default resize procedure.
+
+When allocator does not support resize operation, but supports `.Alloc` and
+`.Free`, this procedure is used to implement allocator's default behavior on
+resize.
+
+The behavior of the function is as follows:
+
+- If `new_size` is `0`, the function acts like `free()`, freeing the memory
+	region specified by `old_data`.
+- If `old_data` is `nil`, the function acts like `alloc()`, allocating
+	`new_size` bytes of memory aligned on a boundary specified by `alignment`.
+- Otherwise, a new memory region of size `new_size` is allocated, then the
+	data from the old memory region is copied and the old memory region is
+	freed.
+*/
 @(require_results)
-default_resize_bytes_align :: proc(old_data: []byte, new_size, alignment: int, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+default_resize_bytes_align :: proc(
+	old_data: []byte,
+	new_size: int,
+	alignment: int,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	return _default_resize_bytes_align(old_data, new_size, alignment, true, allocator, loc)
 }
 
 @(require_results)
-_default_resize_bytes_align :: #force_inline proc(old_data: []byte, new_size, alignment: int, should_zero: bool, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+_default_resize_bytes_align :: #force_inline proc(
+	old_data: []byte,
+	new_size: int,
+	alignment: int,
+	should_zero: bool,
+	allocator := context.allocator,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	old_memory := raw_data(old_data)
 	old_size := len(old_data)
 	if old_memory == nil {
@@ -231,16 +1091,13 @@ _default_resize_bytes_align :: #force_inline proc(old_data: []byte, new_size, al
 			return alloc_bytes_non_zeroed(new_size, alignment, allocator, loc)
 		}
 	}
-
 	if new_size == 0 {
 		err := free_bytes(old_data, allocator, loc)
 		return nil, err
 	}
-
-	if new_size == old_size {
+	if new_size == old_size && is_aligned(old_memory, alignment) {
 		return old_data, .None
 	}
-
 	new_memory : []byte
 	err : Allocator_Error
 	if should_zero {
@@ -251,7 +1108,6 @@ _default_resize_bytes_align :: #force_inline proc(old_data: []byte, new_size, al
 	if new_memory == nil || err != nil {
 		return nil, err
 	}
-
 	runtime.copy(new_memory, old_data)
 	free_bytes(old_data, allocator, loc)
 	return new_memory, err
diff --git a/core/mem/allocators.odin b/core/mem/allocators.odin
index a5b93ad05..d729b902c 100644
--- a/core/mem/allocators.odin
+++ b/core/mem/allocators.odin
@@ -3,12 +3,14 @@ package mem
 import "base:intrinsics"
 import "base:runtime"
 
-nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                           size, alignment: int,
-                           old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
-	return nil, nil
-}
+/*
+Nil allocator.
 
+The `nil` allocator returns `nil` on every allocation attempt. This type of
+allocator can be used in scenarios where memory doesn't need to be allocated,
+but an attempt to allocate memory is not an error.
+*/
+@(require_results)
 nil_allocator :: proc() -> Allocator {
 	return Allocator{
 		procedure = nil_allocator_proc,
@@ -16,8 +18,81 @@ nil_allocator :: proc() -> Allocator {
 	}
 }
 
-// Custom allocators
+nil_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	return nil, nil
+}
+
+
+/*
+Panic allocator.
+
+The panic allocator is a type of allocator that panics on any allocation
+attempt. This type of allocator can be used in scenarios where memory should
+not be allocated, and an attempt to allocate memory is an error.
+*/
+@(require_results)
+panic_allocator :: proc() -> Allocator {
+	return Allocator{
+		procedure = panic_allocator_proc,
+		data = nil,
+	}
+}
+
+panic_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+    size, alignment: int,
+    old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	switch mode {
+	case .Alloc:
+		if size > 0 {
+			panic("mem: panic allocator, .Alloc called", loc=loc)
+		}
+	case .Alloc_Non_Zeroed:
+		if size > 0 {
+			panic("mem: panic allocator, .Alloc_Non_Zeroed called", loc=loc)
+		}
+	case .Resize:
+		if size > 0 {
+			panic("mem: panic allocator, .Resize called", loc=loc)
+		}
+	case .Resize_Non_Zeroed:
+		if size > 0 {
+			panic("mem: panic allocator, .Resize_Non_Zeroed called", loc=loc)
+		}
+	case .Free:
+		if old_memory != nil {
+			panic("mem: panic allocator, .Free called", loc=loc)
+		}
+	case .Free_All:
+		panic("mem: panic allocator, .Free_All called", loc=loc)
+	case .Query_Features:
+		set := (^Allocator_Mode_Set)(old_memory)
+		if set != nil {
+			set^ = {.Query_Features}
+		}
+		return nil, nil
+
+	case .Query_Info:
+		panic("mem: panic allocator, .Query_Info called", loc=loc)
+	}
+	return nil, nil
+}
+
 
+/*
+Arena allocator data.
+*/
 Arena :: struct {
 	data:       []byte,
 	offset:     int,
@@ -25,12 +100,39 @@ Arena :: struct {
 	temp_count: int,
 }
 
-Arena_Temp_Memory :: struct {
-	arena:       ^Arena,
-	prev_offset: int,
+/*
+Arena allocator.
+
+The arena allocator (also known as a linear allocator, bump allocator,
+region allocator) is an allocator that uses a single backing buffer for
+allocations.
+
+The buffer is being used contiguously, from start by end. Each subsequent
+allocation occupies the next adjacent region of memory in the buffer. Since
+arena allocator does not keep track of any metadata associated with the
+allocations and their locations, it is impossible to free individual
+allocations.
+
+The arena allocator can be used for temporary allocations in frame-based memory
+management. Games are one example of such applications. A global arena can be
+used for any temporary memory allocations, and at the end of each frame all
+temporary allocations are freed. Since no temporary object is going to live
+longer than a frame, no lifetimes are violated.
+*/
+@(require_results)
+arena_allocator :: proc(arena: ^Arena) -> Allocator {
+	return Allocator{
+		procedure = arena_allocator_proc,
+		data = arena,
+	}
 }
 
+/*
+Initialize an arena.
 
+This procedure initializes the arena `a` with memory region `data` as it's
+backing buffer.
+*/
 arena_init :: proc(a: ^Arena, data: []byte) {
 	a.data       = data
 	a.offset     = 0
@@ -46,64 +148,157 @@ init_arena :: proc(a: ^Arena, data: []byte) {
 	a.temp_count = 0
 }
 
+/*
+Allocate memory from an arena.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from an arena `a`. The allocated memory is zero-initialized.
+This procedure returns a pointer to the newly allocated memory region.
+*/
 @(require_results)
-arena_allocator :: proc(arena: ^Arena) -> Allocator {
-	return Allocator{
-		procedure = arena_allocator_proc,
-		data = arena,
-	}
+arena_alloc :: proc(
+	a: ^Arena,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := arena_alloc_bytes(a, size, alignment, loc)
+	return raw_data(bytes), err
 }
 
-arena_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, location := #caller_location) -> ([]byte, Allocator_Error)  {
-	arena := cast(^Arena)allocator_data
+/*
+Allocate memory from an arena.
 
-	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		#no_bounds_check end := &arena.data[arena.offset]
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from an arena `a`. The allocated memory is zero-initialized.
+This procedure returns a slice of the newly allocated memory region.
+*/
+@(require_results)
+arena_alloc_bytes :: proc(
+	a: ^Arena,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := arena_alloc_bytes_non_zeroed(a, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-		ptr := align_forward(end, uintptr(alignment))
+/*
+Allocate non-initialized memory from an arena.
 
-		total_size := size + ptr_sub((^byte)(ptr), (^byte)(end))
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from an arena `a`. The allocated memory is not explicitly
+zero-initialized. This procedure returns a pointer to the newly allocated
+memory region.
+*/
+@(require_results)
+arena_alloc_non_zeroed :: proc(
+	a: ^Arena,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := arena_alloc_bytes_non_zeroed(a, size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-		if arena.offset + total_size > len(arena.data) {
-			return nil, .Out_Of_Memory
-		}
+/*
+Allocate non-initialized memory from an arena.
 
-		arena.offset += total_size
-		arena.peak_used = max(arena.peak_used, arena.offset)
-		if mode != .Alloc_Non_Zeroed {
-			zero(ptr, size)
-		}
-		return byte_slice(ptr, size), nil
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from an arena `a`. The allocated memory is not explicitly
+zero-initialized. This procedure returns a slice of the newly allocated
+memory region.
+*/
+@(require_results)
+arena_alloc_bytes_non_zeroed :: proc(
+	a: ^Arena,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> ([]byte, Allocator_Error) {
+	if a.data == nil {
+		panic("Arena is not initialized", loc)
+	}
+	#no_bounds_check end := &a.data[a.offset]
+	ptr := align_forward(end, uintptr(alignment))
+	total_size := size + ptr_sub((^byte)(ptr), (^byte)(end))
+	if a.offset + total_size > len(a.data) {
+		return nil, .Out_Of_Memory
+	}
+	a.offset += total_size
+	a.peak_used = max(a.peak_used, a.offset)
+	return byte_slice(ptr, size), nil
+}
+
+/*
+Free all memory to an arena.
+*/
+arena_free_all :: proc(a: ^Arena) {
+	a.offset = 0
+}
 
+arena_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error)  {
+	arena := cast(^Arena)allocator_data
+	switch mode {
+	case .Alloc:
+		return arena_alloc_bytes(arena, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return arena_alloc_bytes_non_zeroed(arena, size, alignment, loc)
 	case .Free:
 		return nil, .Mode_Not_Implemented
-
 	case .Free_All:
-		arena.offset = 0
-
+		arena_free_all(arena)
 	case .Resize:
-		return default_resize_bytes_align(byte_slice(old_memory, old_size), size, alignment, arena_allocator(arena))
-
+		return default_resize_bytes_align(byte_slice(old_memory, old_size), size, alignment, arena_allocator(arena), loc)
 	case .Resize_Non_Zeroed:
-		return default_resize_bytes_align_non_zeroed(byte_slice(old_memory, old_size), size, alignment, arena_allocator(arena))
-
+		return default_resize_bytes_align_non_zeroed(byte_slice(old_memory, old_size), size, alignment, arena_allocator(arena), loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features}
 		}
 		return nil, nil
-
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 	}
-
 	return nil, nil
 }
 
+/*
+Temporary memory region of arena.
+
+Temporary memory regions of arena act as "savepoints" for arena. When one is
+created, the subsequent allocations are done inside the temporary memory
+region. When `end_arena_temp_memory` is called, the arena is rolled back, and
+all of the memory that was allocated from the arena will be freed.
+
+Multiple temporary memory regions can exist at the same time for an arena.
+*/
+Arena_Temp_Memory :: struct {
+	arena:       ^Arena,
+	prev_offset: int,
+}
+
+/*
+Start a temporary memory region.
+
+This procedure creates a temporary memory region. After a temporary memory
+region is created, all allocations are said to be *inside* the temporary memory
+region, until `end_arena_temp_memory` is called.
+*/
 @(require_results)
 begin_arena_temp_memory :: proc(a: ^Arena) -> Arena_Temp_Memory {
 	tmp: Arena_Temp_Memory
@@ -113,6 +308,12 @@ begin_arena_temp_memory :: proc(a: ^Arena) -> Arena_Temp_Memory {
 	return tmp
 }
 
+/*
+End a temporary memory region.
+
+This procedure ends the temporary memory region for an arena. All of the
+allocations *inside* the temporary memory region will be freed to the arena.
+*/
 end_arena_temp_memory :: proc(tmp: Arena_Temp_Memory) {
 	assert(tmp.arena.offset >= tmp.prev_offset)
 	assert(tmp.arena.temp_count > 0)
@@ -120,9 +321,15 @@ end_arena_temp_memory :: proc(tmp: Arena_Temp_Memory) {
 	tmp.arena.temp_count -= 1
 }
 
+/* Preserved for compatibility */
+Scratch_Allocator :: Scratch
+scratch_allocator_init :: scratch_init
+scratch_allocator_destroy :: scratch_destroy
 
-
-Scratch_Allocator :: struct {
+/*
+Scratch allocator data.
+*/
+Scratch :: struct {
 	data:               []byte,
 	curr_offset:        int,
 	prev_allocation:    rawptr,
@@ -130,7 +337,35 @@ Scratch_Allocator :: struct {
 	leaked_allocations: [dynamic][]byte,
 }
 
-scratch_allocator_init :: proc(s: ^Scratch_Allocator, size: int, backup_allocator := context.allocator) -> Allocator_Error {
+/*
+Scratch allocator.
+
+The scratch allocator works in a similar way to the `Arena` allocator. The
+scratch allocator has a backing buffer, that is being allocated in
+contiguous regions, from start to end.
+
+Each subsequent allocation will be the next adjacent region of memory in the
+backing buffer. If the allocation doesn't fit into the remaining space of the
+backing buffer, this allocation is put at the start of the buffer, and all
+previous allocations will become invalidated. If the allocation doesn't fit
+into the backing buffer as a whole, it will be allocated using a backing
+allocator, and pointer to the allocated memory region will be put into the
+`leaked_allocations` array.
+
+The `leaked_allocations` array is managed by the `context` allocator.
+*/
+@(require_results)
+scratch_allocator :: proc(allocator: ^Scratch) -> Allocator {
+	return Allocator{
+		procedure = scratch_allocator_proc,
+		data = allocator,
+	}
+}
+
+/*
+Initialize scratch allocator.
+*/
+scratch_init :: proc(s: ^Scratch, size: int, backup_allocator := context.allocator) -> Allocator_Error {
 	s.data = make_aligned([]byte, size, 2*align_of(rawptr), backup_allocator) or_return
 	s.curr_offset = 0
 	s.prev_allocation = nil
@@ -139,7 +374,10 @@ scratch_allocator_init :: proc(s: ^Scratch_Allocator, size: int, backup_allocato
 	return nil
 }
 
-scratch_allocator_destroy :: proc(s: ^Scratch_Allocator) {
+/*
+Free all data associated with a scratch allocator.
+*/
+scratch_destroy :: proc(s: ^Scratch) {
 	if s == nil {
 		return
 	}
@@ -151,60 +389,105 @@ scratch_allocator_destroy :: proc(s: ^Scratch_Allocator) {
 	s^ = {}
 }
 
-scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                               size, alignment: int,
-                               old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+/*
+Allocate memory from scratch allocator.
 
-	s := (^Scratch_Allocator)(allocator_data)
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is zero-initialized. This procedure
+returns a pointer to the allocated memory region.
+*/
+@(require_results)
+scratch_alloc :: proc(
+	s: ^Scratch,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := scratch_alloc_bytes(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-	if s.data == nil {
-		DEFAULT_BACKING_SIZE :: 4 * Megabyte
-		if !(context.allocator.procedure != scratch_allocator_proc &&
-		     context.allocator.data != allocator_data) {
-			panic("cyclic initialization of the scratch allocator with itself")
-		}
-		scratch_allocator_init(s, DEFAULT_BACKING_SIZE)
-	}
+/*
+Allocate memory from scratch allocator.
 
-	size := size
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is zero-initialized. This procedure
+returns a slice of the allocated memory region.
+*/
+@(require_results)
+scratch_alloc_bytes :: proc(
+	s: ^Scratch,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := scratch_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		size = align_forward_int(size, alignment)
-
-		switch {
-		case s.curr_offset+size <= len(s.data):
-			start := uintptr(raw_data(s.data))
-			ptr := start + uintptr(s.curr_offset)
-			ptr = align_forward_uintptr(ptr, uintptr(alignment))
-			if mode != .Alloc_Non_Zeroed {
-				zero(rawptr(ptr), size)
-			}
+/*
+Allocate non-initialized memory from scratch allocator.
 
-			s.prev_allocation = rawptr(ptr)
-			offset := int(ptr - start)
-			s.curr_offset = offset + size
-			return byte_slice(rawptr(ptr), size), nil
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is not explicitly zero-initialized.
+This procedure returns a pointer to the allocated memory region.
+*/
+@(require_results)
+scratch_alloc_non_zeroed :: proc(
+	s: ^Scratch,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := scratch_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-		case size <= len(s.data):
-			start := uintptr(raw_data(s.data))
-			ptr := align_forward_uintptr(start, uintptr(alignment))
-			if mode != .Alloc_Non_Zeroed {
-				zero(rawptr(ptr), size)
-			}
+/*
+Allocate non-initialized memory from scratch allocator.
 
-			s.prev_allocation = rawptr(ptr)
-			offset := int(ptr - start)
-			s.curr_offset = offset + size
-			return byte_slice(rawptr(ptr), size), nil
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is not explicitly zero-initialized.
+This procedure returns a slice of the allocated memory region.
+*/
+@(require_results)
+scratch_alloc_bytes_non_zeroed :: proc(
+	s: ^Scratch,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	if s.data == nil {
+		DEFAULT_BACKING_SIZE :: 4 * Megabyte
+		if !(context.allocator.procedure != scratch_allocator_proc && context.allocator.data != s) {
+			panic("cyclic initialization of the scratch allocator with itself", loc)
 		}
+		scratch_init(s, DEFAULT_BACKING_SIZE)
+	}
+	size := size
+	size = align_forward_int(size, alignment)
+	if size <= len(s.data) {
+		offset := uintptr(0)
+		if s.curr_offset+size <= len(s.data) {
+			offset = uintptr(s.curr_offset)
+		} else {
+			offset = 0
+		}
+		start := uintptr(raw_data(s.data))
+		ptr := align_forward_uintptr(offset+start, uintptr(alignment))
+		s.prev_allocation = rawptr(ptr)
+		s.curr_offset = int(offset) + size
+		return byte_slice(rawptr(ptr), size), nil
+	} else {
 		a := s.backup_allocator
 		if a.procedure == nil {
 			a = context.allocator
 			s.backup_allocator = a
 		}
-
-		ptr, err := alloc_bytes(size, alignment, a, loc)
+		ptr, err := alloc_bytes_non_zeroed(size, alignment, a, loc)
 		if err != nil {
 			return ptr, err
 		}
@@ -212,110 +495,290 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 			s.leaked_allocations, err = make([dynamic][]byte, a)
 		}
 		append(&s.leaked_allocations, ptr)
-
 		if logger := context.logger; logger.lowest_level <= .Warning {
 			if logger.procedure != nil {
-				logger.procedure(logger.data, .Warning, "mem.Scratch_Allocator resorted to backup_allocator" , logger.options, loc)
+				logger.procedure(logger.data, .Warning, "mem.Scratch resorted to backup_allocator" , logger.options, loc)
 			}
 		}
-
 		return ptr, err
+	}
+}
 
-	case .Free:
-		if old_memory == nil {
-			return nil, nil
-		}
-		start := uintptr(raw_data(s.data))
-		end := start + uintptr(len(s.data))
-		old_ptr := uintptr(old_memory)
-
-		if s.prev_allocation == old_memory {
-			s.curr_offset = int(uintptr(s.prev_allocation) - start)
-			s.prev_allocation = nil
-			return nil, nil
-		}
+/*
+Free memory to the scratch allocator.
 
-		if start <= old_ptr && old_ptr < end {
-			// NOTE(bill): Cannot free this pointer but it is valid
-			return nil, nil
-		}
+This procedure frees the memory region allocated at pointer `ptr`.
 
-		if len(s.leaked_allocations) != 0 {
-			for data, i in s.leaked_allocations {
-				ptr := raw_data(data)
-				if ptr == old_memory {
-					free_bytes(data, s.backup_allocator)
-					ordered_remove(&s.leaked_allocations, i)
-					return nil, nil
-				}
+If `ptr` is not the latest allocation and is not a leaked allocation, this
+operation is a no-op.
+*/
+scratch_free :: proc(s: ^Scratch, ptr: rawptr, loc := #caller_location) -> Allocator_Error {
+	if s.data == nil {
+		panic("Free on an uninitialized scratch allocator", loc)
+	}
+	if ptr == nil {
+		return nil
+	}
+	start := uintptr(raw_data(s.data))
+	end := start + uintptr(len(s.data))
+	old_ptr := uintptr(ptr)
+	if s.prev_allocation == ptr {
+		s.curr_offset = int(uintptr(s.prev_allocation) - start)
+		s.prev_allocation = nil
+		return nil
+	}
+	if start <= old_ptr && old_ptr < end {
+		// NOTE(bill): Cannot free this pointer but it is valid
+		return nil
+	}
+	if len(s.leaked_allocations) != 0 {
+		for data, i in s.leaked_allocations {
+			ptr := raw_data(data)
+			if ptr == ptr {
+				free_bytes(data, s.backup_allocator, loc)
+				ordered_remove(&s.leaked_allocations, i, loc)
+				return nil
 			}
 		}
-		return nil, .Invalid_Pointer
-		// panic("invalid pointer passed to default_temp_allocator");
+	}
+	return .Invalid_Pointer
+}
 
-	case .Free_All:
-		s.curr_offset = 0
-		s.prev_allocation = nil
-		for ptr in s.leaked_allocations {
-			free_bytes(ptr, s.backup_allocator)
-		}
-		clear(&s.leaked_allocations)
+/*
+Free all memory to the scratch allocator.
+*/
+scratch_free_all :: proc(s: ^Scratch, loc := #caller_location) {
+	s.curr_offset = 0
+	s.prev_allocation = nil
+	for ptr in s.leaked_allocations {
+		free_bytes(ptr, s.backup_allocator, loc)
+	}
+	clear(&s.leaked_allocations)
+}
 
-	case .Resize, .Resize_Non_Zeroed:
-		begin := uintptr(raw_data(s.data))
-		end := begin + uintptr(len(s.data))
-		old_ptr := uintptr(old_memory)
-		if begin <= old_ptr && old_ptr < end && old_ptr+uintptr(size) < end {
-			s.curr_offset = int(old_ptr-begin)+size
-			return byte_slice(old_memory, size), nil
-		}
-		data, err := scratch_allocator_proc(allocator_data, .Alloc, size, alignment, old_memory, old_size, loc)
-		if err != nil {
-			return data, err
+/*
+Resize an allocation.
+
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `scratch_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `scratch_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+scratch_resize :: proc(
+	s: ^Scratch,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> (rawptr, Allocator_Error) {
+	bytes, err := scratch_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation.
+
+This procedure resizes a memory region, specified by `old_data`, to have a size
+`size` and alignment `alignment`. The newly allocated memory, if any is
+zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `scratch_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `scratch_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+scratch_resize_bytes :: proc(
+	s: ^Scratch,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> ([]byte, Allocator_Error) {
+	bytes, err := scratch_resize_bytes_non_zeroed(s, old_data, size, alignment, loc)
+	if bytes != nil && size > len(old_data) {
+		zero_slice(bytes[size:])
+	}
+	return bytes, err
+}
+
+/*
+Resize an allocation without zero-initialization.
+
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is not explicitly zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `scratch_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `scratch_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+scratch_resize_non_zeroed :: proc(
+	s: ^Scratch,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> (rawptr, Allocator_Error) {
+	bytes, err := scratch_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation.
+
+This procedure resizes a memory region, specified by `old_data`, to have a size
+`size` and alignment `alignment`. The newly allocated memory, if any is not
+explicitly zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `scratch_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `scratch_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+scratch_resize_bytes_non_zeroed :: proc(
+	s: ^Scratch,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> ([]byte, Allocator_Error) {
+	old_memory := raw_data(old_data)
+	old_size := len(old_data)
+	if s.data == nil {
+		DEFAULT_BACKING_SIZE :: 4 * Megabyte
+		if !(context.allocator.procedure != scratch_allocator_proc && context.allocator.data != s) {
+			panic("cyclic initialization of the scratch allocator with itself", loc)
 		}
-		runtime.copy(data, byte_slice(old_memory, old_size))
-		_, err = scratch_allocator_proc(allocator_data, .Free, 0, alignment, old_memory, old_size, loc)
+		scratch_init(s, DEFAULT_BACKING_SIZE)
+	}
+	begin := uintptr(raw_data(s.data))
+	end := begin + uintptr(len(s.data))
+	old_ptr := uintptr(old_memory)
+	if begin <= old_ptr && old_ptr < end && old_ptr+uintptr(size) < end {
+		s.curr_offset = int(old_ptr-begin)+size
+		return byte_slice(old_memory, size), nil
+	}
+	data, err := scratch_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if err != nil {
 		return data, err
+	}
+	runtime.copy(data, byte_slice(old_memory, old_size))
+	err = scratch_free(s, old_memory, loc)
+	return data, err
+}
 
+scratch_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	s := (^Scratch)(allocator_data)
+	size := size
+	switch mode {
+	case .Alloc:
+		return scratch_alloc_bytes(s, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return scratch_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	case .Free:
+		return nil, scratch_free(s, old_memory, loc)
+	case .Free_All:
+		scratch_free_all(s, loc)
+	case .Resize:
+		return scratch_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return scratch_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features}
 		}
 		return nil, nil
-
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 	}
-
 	return nil, nil
 }
 
-@(require_results)
-scratch_allocator :: proc(allocator: ^Scratch_Allocator) -> Allocator {
-	return Allocator{
-		procedure = scratch_allocator_proc,
-		data = allocator,
-	}
-}
-
-
 
 
+/*
+Stack allocator data.
+*/
+Stack :: struct {
+	data: []byte,
+	prev_offset: int,
+	curr_offset: int,
+	peak_used: int,
+}
 
+/*
+Header of a stack allocation.
+*/
 Stack_Allocation_Header :: struct {
 	prev_offset: int,
 	padding:     int,
 }
 
-// Stack is a stack-like allocator which has a strict memory freeing order
-Stack :: struct {
-	data: []byte,
-	prev_offset: int,
-	curr_offset: int,
-	peak_used: int,
+/*
+Stack allocator.
+
+The stack allocator is an allocator that allocates data in the backing buffer
+linearly, from start to end. Each subsequent allocation will get the next
+adjacent memory region.
+
+Unlike arena allocator, the stack allocator saves allocation metadata and has
+a strict freeing order. Only the last allocated element can be freed. After the
+last allocated element is freed, the next previous allocated element becomes
+available for freeing.
+
+The metadata is stored in the allocation headers, that are located before the
+start of each allocated memory region. Each header points to the start of the
+previous allocation header.
+*/
+@(require_results)
+stack_allocator :: proc(stack: ^Stack) -> Allocator {
+	return Allocator{
+		procedure = stack_allocator_proc,
+		data = stack,
+	}
 }
 
+/*
+Initialize the stack allocator.
+
+This procedure initializes the stack allocator with a backing buffer specified
+by `data` parameter.
+*/
 stack_init :: proc(s: ^Stack, data: []byte) {
 	s.data = data
 	s.prev_offset = 0
@@ -331,129 +794,333 @@ init_stack :: proc(s: ^Stack, data: []byte) {
 	s.peak_used = 0
 }
 
+/*
+Allocate memory from stack.
+
+This procedure allocates `size` bytes of memory, aligned to the boundary
+specified by `alignment`. The allocated memory is zero-initialized. This
+procedure returns the pointer to the allocated memory.
+*/
 @(require_results)
-stack_allocator :: proc(stack: ^Stack) -> Allocator {
-	return Allocator{
-		procedure = stack_allocator_proc,
-		data = stack,
+stack_alloc :: proc(
+	s: ^Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> (rawptr, Allocator_Error) {
+	bytes, err := stack_alloc_bytes(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Allocate memory from stack.
+
+This procedure allocates `size` bytes of memory, aligned to the boundary
+specified by `alignment`. The allocated memory is zero-initialized. This
+procedure returns the slice of the allocated memory.
+*/
+@(require_results)
+stack_alloc_bytes :: proc(
+	s: ^Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> ([]byte, Allocator_Error) {
+	bytes, err := stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
 	}
+	return bytes, err
 }
 
+/*
+Allocate memory from stack.
 
-stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, location := #caller_location) -> ([]byte, Allocator_Error) {
-	s := cast(^Stack)allocator_data
+This procedure allocates `size` bytes of memory, aligned to the boundary
+specified by `alignment`. The allocated memory is not explicitly
+zero-initialized. This procedure returns the pointer to the allocated memory.
+*/
+@(require_results)
+stack_alloc_non_zeroed :: proc(
+	s: ^Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> (rawptr, Allocator_Error) {
+	bytes, err := stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Allocate memory from stack.
 
+This procedure allocates `size` bytes of memory, aligned to the boundary
+specified by `alignment`. The allocated memory is not explicitly
+zero-initialized. This procedure returns the slice of the allocated memory.
+*/
+@(require_results)
+stack_alloc_bytes_non_zeroed :: proc(
+	s: ^Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location
+) -> ([]byte, Allocator_Error) {
 	if s.data == nil {
-		return nil, .Invalid_Argument
+		panic("Stack allocation on an uninitialized stack allocator", loc)
 	}
+	curr_addr := uintptr(raw_data(s.data)) + uintptr(s.curr_offset)
+	padding := calc_padding_with_header(
+		curr_addr,
+		uintptr(alignment),
+		size_of(Stack_Allocation_Header),
+	)
+	if s.curr_offset + padding + size > len(s.data) {
+		return nil, .Out_Of_Memory
+	}
+	s.prev_offset = s.curr_offset
+	s.curr_offset += padding
+	next_addr := curr_addr + uintptr(padding)
+	header := (^Stack_Allocation_Header)(next_addr - size_of(Stack_Allocation_Header))
+	header.padding = padding
+	header.prev_offset = s.prev_offset
+	s.curr_offset += size
+	s.peak_used = max(s.peak_used, s.curr_offset)
+	return byte_slice(rawptr(next_addr), size), nil
+}
 
-	raw_alloc :: proc(s: ^Stack, size, alignment: int, zero_memory: bool) -> ([]byte, Allocator_Error) {
-		curr_addr := uintptr(raw_data(s.data)) + uintptr(s.curr_offset)
-		padding := calc_padding_with_header(curr_addr, uintptr(alignment), size_of(Stack_Allocation_Header))
-		if s.curr_offset + padding + size > len(s.data) {
-			return nil, .Out_Of_Memory
-		}
-		s.prev_offset = s.curr_offset
-		s.curr_offset += padding
-
-		next_addr := curr_addr + uintptr(padding)
-		header := (^Stack_Allocation_Header)(next_addr - size_of(Stack_Allocation_Header))
-		header.padding = padding
-		header.prev_offset = s.prev_offset
+/*
+Free memory to the stack.
+
+This procedure frees the memory region starting at `old_memory` to the stack.
+If the freeing does is an out of order freeing, the `.Invalid_Pointer` error
+is returned.
+*/
+stack_free :: proc(
+	s: ^Stack,
+	old_memory: rawptr,
+	loc := #caller_location,
+) -> (Allocator_Error) {
+	if s.data == nil {
+		panic("Stack free on an uninitialized stack allocator", loc)
+	}
+	if old_memory == nil {
+		return nil
+	}
+	start := uintptr(raw_data(s.data))
+	end := start + uintptr(len(s.data))
+	curr_addr := uintptr(old_memory)
+	if !(start <= curr_addr && curr_addr < end) {
+		panic("Out of bounds memory address passed to stack allocator (free)", loc)
+	}
+	if curr_addr >= start+uintptr(s.curr_offset) {
+		// NOTE(bill): Allow double frees
+		return nil
+	}
+	header := (^Stack_Allocation_Header)(curr_addr - size_of(Stack_Allocation_Header))
+	old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+	if old_offset != header.prev_offset {
+		// panic("Out of order stack allocator free");
+		return .Invalid_Pointer
+	}
+	s.curr_offset = old_offset
+	s.prev_offset = header.prev_offset
+	return nil
+}
 
-		s.curr_offset += size
+/*
+Free all allocations to the stack.
+*/
+stack_free_all :: proc(s: ^Stack, loc := #caller_location) {
+	s.prev_offset = 0
+	s.curr_offset = 0
+}
 
-		s.peak_used = max(s.peak_used, s.curr_offset)
+/*
+Resize an allocation.
 
-		if zero_memory {
-			zero(rawptr(next_addr), size)
-		}
-		return byte_slice(rawptr(next_addr), size), nil
-	}
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is zero-initialized.
 
-	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		return raw_alloc(s, size, alignment, mode == .Alloc)
-	case .Free:
-		if old_memory == nil {
-			return nil, nil
-		}
-		start := uintptr(raw_data(s.data))
-		end := start + uintptr(len(s.data))
-		curr_addr := uintptr(old_memory)
+If `old_memory` is `nil`, this procedure acts just like `stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-		if !(start <= curr_addr && curr_addr < end) {
-			panic("Out of bounds memory address passed to stack allocator (free)")
-		}
+If `size` is 0, this procedure acts just like `stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-		if curr_addr >= start+uintptr(s.curr_offset) {
-			// NOTE(bill): Allow double frees
-			return nil, nil
-		}
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+stack_resize :: proc(
+	s: ^Stack,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := stack_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment)
+	return raw_data(bytes), err
+}
 
-		header := (^Stack_Allocation_Header)(curr_addr - size_of(Stack_Allocation_Header))
-		old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+/*
+Resize an allocation.
 
-		if old_offset != header.prev_offset {
-			// panic("Out of order stack allocator free");
-			return nil, .Invalid_Pointer
-		}
+This procedure resizes a memory region, specified by the `old_data` parameter
+to have a size `size` and alignment `alignment`. The newly allocated memory,
+if any is zero-initialized.
 
-		s.curr_offset = old_offset
-		s.prev_offset = header.prev_offset
+If `old_memory` is `nil`, this procedure acts just like `stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-	case .Free_All:
-		s.prev_offset = 0
-		s.curr_offset = 0
+If `size` is 0, this procedure acts just like `stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-	case .Resize, .Resize_Non_Zeroed:
-		if old_memory == nil {
-			return raw_alloc(s, size, alignment, mode == .Resize)
-		}
-		if size == 0 {
-			return nil, nil
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+stack_resize_bytes :: proc(
+	s: ^Stack,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if bytes != nil {
+		if old_data == nil {
+			zero_slice(bytes)
+		} else if size > len(old_data) {
+			zero_slice(bytes[len(old_data):])
 		}
+	}
+	return bytes, err
+}
 
-		start := uintptr(raw_data(s.data))
-		end := start + uintptr(len(s.data))
-		curr_addr := uintptr(old_memory)
-		if !(start <= curr_addr && curr_addr < end) {
-			panic("Out of bounds memory address passed to stack allocator (resize)")
-		}
+/*
+Resize an allocation without zero-initialization.
 
-		if curr_addr >= start+uintptr(s.curr_offset) {
-			// NOTE(bill): Allow double frees
-			return nil, nil
-		}
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is not explicitly zero-initialized.
 
-		if old_size == size {
-			return byte_slice(old_memory, size), nil
-		}
+If `old_memory` is `nil`, this procedure acts just like `stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-		header := (^Stack_Allocation_Header)(curr_addr - size_of(Stack_Allocation_Header))
-		old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+If `size` is 0, this procedure acts just like `stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-		if old_offset != header.prev_offset {
-			data, err := raw_alloc(s, size, alignment, mode == .Resize)
-			if err == nil {
-				runtime.copy(data, byte_slice(old_memory, old_size))
-			}
-			return data, err
-		}
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+stack_resize_non_zeroed :: proc(
+	s: ^Stack,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := stack_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment)
+	return raw_data(bytes), err
+}
 
-		old_memory_size := uintptr(s.curr_offset) - (curr_addr - start)
-		assert(old_memory_size == uintptr(old_size))
+/*
+Resize an allocation without zero-initialization.
 
-		diff := size - old_size
-		s.curr_offset += diff // works for smaller sizes too
-		if diff > 0 {
-			zero(rawptr(curr_addr + uintptr(diff)), diff)
-		}
+This procedure resizes a memory region, specified by the `old_data` parameter
+to have a size `size` and alignment `alignment`. The newly allocated memory,
+if any is not explicitly zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
+If `size` is 0, this procedure acts just like `stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+stack_resize_bytes_non_zeroed :: proc(
+	s: ^Stack,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	old_memory := raw_data(old_data)
+	old_size := len(old_data)
+	if s.data == nil {
+		panic("Stack free all on an uninitialized stack allocator", loc)
+	}
+	if old_memory == nil {
+		return stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	}
+	if size == 0 {
+		return nil, nil
+	}
+	start := uintptr(raw_data(s.data))
+	end := start + uintptr(len(s.data))
+	curr_addr := uintptr(old_memory)
+	if !(start <= curr_addr && curr_addr < end) {
+		panic("Out of bounds memory address passed to stack allocator (resize)")
+	}
+	if curr_addr >= start+uintptr(s.curr_offset) {
+		// NOTE(bill): Allow double frees
+		return nil, nil
+	}
+	if old_size == size {
 		return byte_slice(old_memory, size), nil
+	}
+	header := (^Stack_Allocation_Header)(curr_addr - size_of(Stack_Allocation_Header))
+	old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+	if old_offset != header.prev_offset {
+		data, err := stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+		if err == nil {
+			runtime.copy(data, byte_slice(old_memory, old_size))
+		}
+		return data, err
+	}
+	old_memory_size := uintptr(s.curr_offset) - (curr_addr - start)
+	assert(old_memory_size == uintptr(old_size))
+	diff := size - old_size
+	s.curr_offset += diff // works for smaller sizes too
+	if diff > 0 {
+		zero(rawptr(curr_addr + uintptr(diff)), diff)
+	}
+	return byte_slice(old_memory, size), nil
+}
 
+stack_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	s := cast(^Stack)allocator_data
+	if s.data == nil {
+		return nil, .Invalid_Argument
+	}
+	switch mode {
+	case .Alloc:
+		return stack_alloc_bytes(s, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	case .Free:
+		return nil, stack_free(s, old_memory, loc)
+	case .Free_All:
+		stack_free_all(s, loc)
+	case .Resize:
+		return stack_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return stack_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
@@ -463,27 +1130,32 @@ stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 	}
-
 	return nil, nil
 }
 
 
-
-
-
-
-
+/*
+Allocation header of the small stack allocator.
+*/
 Small_Stack_Allocation_Header :: struct {
 	padding: u8,
 }
 
-// Small_Stack is a stack-like allocator which uses the smallest possible header but at the cost of non-strict memory freeing order
+/*
+Small stack allocator data.
+*/
 Small_Stack :: struct {
 	data:      []byte,
 	offset:    int,
 	peak_used: int,
 }
 
+/*
+Initialize small stack.
+
+This procedure initializes the small stack allocator with `data` as its backing
+buffer.
+*/
 small_stack_init :: proc(s: ^Small_Stack, data: []byte) {
 	s.data      = data
 	s.offset    = 0
@@ -497,6 +1169,20 @@ init_small_stack :: proc(s: ^Small_Stack, data: []byte) {
 	s.peak_used = 0
 }
 
+/*
+Small stack allocator.
+
+The small stack allocator is just like a stack allocator, with the only
+difference being an extremely small header size. Unlike the stack allocator,
+small stack allows out-of order freeing of memory.
+
+The memory is allocated in the backing buffer linearly, from start to end.
+Each subsequent allocation will get the next adjacent memory region.
+
+The metadata is stored in the allocation headers, that are located before the
+start of each allocated memory region. Each header contains the amount of
+padding bytes between that header and end of the previous allocation.
+*/
 @(require_results)
 small_stack_allocator :: proc(stack: ^Small_Stack) -> Allocator {
 	return Allocator{
@@ -505,375 +1191,766 @@ small_stack_allocator :: proc(stack: ^Small_Stack) -> Allocator {
 	}
 }
 
-small_stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                   size, alignment: int,
-                                   old_memory: rawptr, old_size: int, location := #caller_location) -> ([]byte, Allocator_Error) {
-	s := cast(^Small_Stack)allocator_data
+/*
+Allocate memory from small stack.
 
-	if s.data == nil {
-		return nil, .Invalid_Argument
-	}
+This procedure allocates `size` bytes of memory aligned to a boundary specified
+by `alignment`. The allocated memory is zero-initialized. This procedure
+returns a pointer to the allocated memory region.
+*/
+@(require_results)
+small_stack_alloc :: proc(
+	s: ^Small_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := small_stack_alloc_bytes(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-	align := clamp(alignment, 1, 8*size_of(Stack_Allocation_Header{}.padding)/2)
+/*
+Allocate memory from small stack.
 
-	raw_alloc :: proc(s: ^Small_Stack, size, alignment: int, zero_memory: bool) -> ([]byte, Allocator_Error) {
-		curr_addr := uintptr(raw_data(s.data)) + uintptr(s.offset)
-		padding := calc_padding_with_header(curr_addr, uintptr(alignment), size_of(Small_Stack_Allocation_Header))
-		if s.offset + padding + size > len(s.data) {
-			return nil, .Out_Of_Memory
-		}
-		s.offset += padding
+This procedure allocates `size` bytes of memory aligned to a boundary specified
+by `alignment`. The allocated memory is zero-initialized. This procedure
+returns a slice of the allocated memory region.
+*/
+@(require_results)
+small_stack_alloc_bytes :: proc(
+	s: ^Small_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := small_stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-		next_addr := curr_addr + uintptr(padding)
-		header := (^Small_Stack_Allocation_Header)(next_addr - size_of(Small_Stack_Allocation_Header))
-		header.padding = auto_cast padding
+/*
+Allocate memory from small stack.
 
-		s.offset += size
+This procedure allocates `size` bytes of memory aligned to a boundary specified
+by `alignment`. The allocated memory is not explicitly zero-initialized. This
+procedure returns a pointer to the allocated memory region.
+*/
+@(require_results)
+small_stack_alloc_non_zeroed :: proc(
+	s: ^Small_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := small_stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-		s.peak_used = max(s.peak_used, s.offset)
+/*
+Allocate memory from small stack.
 
-		if zero_memory {
-			zero(rawptr(next_addr), size)
-		}
-		return byte_slice(rawptr(next_addr), size), nil
+This procedure allocates `size` bytes of memory aligned to a boundary specified
+by `alignment`. The allocated memory is not explicitly zero-initialized. This
+procedure returns a slice of the allocated memory region.
+*/
+@(require_results)
+small_stack_alloc_bytes_non_zeroed :: proc(
+	s: ^Small_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	if s.data == nil {
+		panic("Small stack is not initialized", loc)
 	}
+	alignment := alignment
+	alignment = clamp(alignment, 1, 8*size_of(Stack_Allocation_Header{}.padding)/2)
+	curr_addr := uintptr(raw_data(s.data)) + uintptr(s.offset)
+	padding := calc_padding_with_header(curr_addr, uintptr(alignment), size_of(Small_Stack_Allocation_Header))
+	if s.offset + padding + size > len(s.data) {
+		return nil, .Out_Of_Memory
+	}
+	s.offset += padding
+	next_addr := curr_addr + uintptr(padding)
+	header := (^Small_Stack_Allocation_Header)(next_addr - size_of(Small_Stack_Allocation_Header))
+	header.padding = auto_cast padding
+	s.offset += size
+	s.peak_used = max(s.peak_used, s.offset)
+	return byte_slice(rawptr(next_addr), size), nil
+}
 
-	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		return raw_alloc(s, size, align, mode == .Alloc)
-	case .Free:
-		if old_memory == nil {
-			return nil, nil
-		}
-		start := uintptr(raw_data(s.data))
-		end := start + uintptr(len(s.data))
-		curr_addr := uintptr(old_memory)
+/*
+Allocate memory from small stack.
+
+This procedure allocates `size` bytes of memory aligned to a boundary specified
+by `alignment`. The allocated memory is not explicitly zero-initialized. This
+procedure returns a slice of the allocated memory region.
+*/
+small_stack_free :: proc(
+	s: ^Small_Stack,
+	old_memory: rawptr,
+	loc := #caller_location,
+) -> Allocator_Error {
+	if s.data == nil {
+		panic("Small stack is not initialized", loc)
+	}
+	if old_memory == nil {
+		return nil
+	}
+	start := uintptr(raw_data(s.data))
+	end := start + uintptr(len(s.data))
+	curr_addr := uintptr(old_memory)
+	if !(start <= curr_addr && curr_addr < end) {
+		// panic("Out of bounds memory address passed to stack allocator (free)");
+		return .Invalid_Pointer
+	}
+	if curr_addr >= start+uintptr(s.offset) {
+		// NOTE(bill): Allow double frees
+		return nil
+	}
+	header := (^Small_Stack_Allocation_Header)(curr_addr - size_of(Small_Stack_Allocation_Header))
+	old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+	s.offset = old_offset
+	return nil
+}
 
-		if !(start <= curr_addr && curr_addr < end) {
-			// panic("Out of bounds memory address passed to stack allocator (free)");
-			return nil, .Invalid_Pointer
-		}
+/*
+Free all memory to small stack.
+*/
+small_stack_free_all :: proc(s: ^Small_Stack) {
+	s.offset = 0
+}
 
-		if curr_addr >= start+uintptr(s.offset) {
-			// NOTE(bill): Allow double frees
-			return nil, nil
-		}
+/*
+Resize an allocation.
 
-		header := (^Small_Stack_Allocation_Header)(curr_addr - size_of(Small_Stack_Allocation_Header))
-		old_offset := int(curr_addr - uintptr(header.padding) - uintptr(raw_data(s.data)))
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is zero-initialized.
 
-		s.offset = old_offset
+If `old_memory` is `nil`, this procedure acts just like `small_stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-	case .Free_All:
-		s.offset = 0
+If `size` is 0, this procedure acts just like `small_stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-	case .Resize, .Resize_Non_Zeroed:
-		if old_memory == nil {
-			return raw_alloc(s, size, align, mode == .Resize)
-		}
-		if size == 0 {
-			return nil, nil
-		}
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+small_stack_resize :: proc(
+	s: ^Small_Stack,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := small_stack_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
 
-		start := uintptr(raw_data(s.data))
-		end := start + uintptr(len(s.data))
-		curr_addr := uintptr(old_memory)
-		if !(start <= curr_addr && curr_addr < end) {
-			// panic("Out of bounds memory address passed to stack allocator (resize)");
-			return nil, .Invalid_Pointer
-		}
+/*
+Resize an allocation.
 
-		if curr_addr >= start+uintptr(s.offset) {
-			// NOTE(bill): Treat as a double free
-			return nil, nil
-		}
+This procedure resizes a memory region, specified by the `old_data` parameter
+to have a size `size` and alignment `alignment`. The newly allocated memory,
+if any is zero-initialized.
 
-		if old_size == size {
-			return byte_slice(old_memory, size), nil
-		}
+If `old_memory` is `nil`, this procedure acts just like `small_stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-		data, err := raw_alloc(s, size, align, mode == .Resize)
-		if err == nil {
-			runtime.copy(data, byte_slice(old_memory, old_size))
-		}
-		return data, err
+If `size` is 0, this procedure acts just like `small_stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-	case .Query_Features:
-		set := (^Allocator_Mode_Set)(old_memory)
-		if set != nil {
-			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features}
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+small_stack_resize_bytes :: proc(
+	s: ^Small_Stack,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := small_stack_resize_bytes_non_zeroed(s, old_data, size, alignment, loc)
+	if bytes != nil {
+		if old_data == nil {
+			zero_slice(bytes)
+		} else if size > len(old_data) {
+			zero_slice(bytes[len(old_data):])
 		}
-		return nil, nil
-
-	case .Query_Info:
-		return nil, .Mode_Not_Implemented
 	}
-
-	return nil, nil
+	return bytes, err
 }
 
+/*
+Resize an allocation without zero-initialization.
 
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is not explicitly zero-initialized.
 
+If `old_memory` is `nil`, this procedure acts just like `small_stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
+If `size` is 0, this procedure acts just like `small_stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-Dynamic_Pool :: struct {
-	block_size:    int,
-	out_band_size: int,
-	alignment:     int,
-
-	unused_blocks:        [dynamic]rawptr,
-	used_blocks:          [dynamic]rawptr,
-	out_band_allocations: [dynamic]rawptr,
-
-	current_block: rawptr,
-	current_pos:   rawptr,
-	bytes_left:    int,
-
-	block_allocator: Allocator,
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+small_stack_resize_non_zeroed :: proc(
+	s: ^Small_Stack,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := small_stack_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	return raw_data(bytes), err
 }
 
+/*
+Resize an allocation without zero-initialization.
 
-DYNAMIC_POOL_BLOCK_SIZE_DEFAULT       :: 65536
-DYNAMIC_POOL_OUT_OF_BAND_SIZE_DEFAULT :: 6554
+This procedure resizes a memory region, specified by the `old_data` parameter
+to have a size `size` and alignment `alignment`. The newly allocated memory,
+if any is not explicitly zero-initialized.
 
+If `old_memory` is `nil`, this procedure acts just like `small_stack_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
+If `size` is 0, this procedure acts just like `small_stack_free()`, freeing the
+memory region located at an address specified by `old_memory`.
 
-dynamic_pool_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                    size, alignment: int,
-                                    old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
-	pool := (^Dynamic_Pool)(allocator_data)
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+small_stack_resize_bytes_non_zeroed :: proc(
+	s: ^Small_Stack,
+	old_data: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	if s.data == nil {
+		panic("Small stack is not initialized", loc)
+	}
+	old_memory := raw_data(old_data)
+	old_size := len(old_data)
+	alignment := alignment
+	alignment = clamp(alignment, 1, 8*size_of(Stack_Allocation_Header{}.padding)/2)
+	if old_memory == nil {
+		return small_stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	}
+	if size == 0 {
+		return nil, nil
+	}
+	start := uintptr(raw_data(s.data))
+	end := start + uintptr(len(s.data))
+	curr_addr := uintptr(old_memory)
+	if !(start <= curr_addr && curr_addr < end) {
+		// panic("Out of bounds memory address passed to stack allocator (resize)");
+		return nil, .Invalid_Pointer
+	}
+	if curr_addr >= start+uintptr(s.offset) {
+		// NOTE(bill): Treat as a double free
+		return nil, nil
+	}
+	if old_size == size {
+		return byte_slice(old_memory, size), nil
+	}
+	data, err := small_stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
+	if err == nil {
+		runtime.copy(data, byte_slice(old_memory, old_size))
+	}
+	return data, err
+
+}
 
+small_stack_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+    size, alignment: int,
+    old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	s := cast(^Small_Stack)allocator_data
+	if s.data == nil {
+		return nil, .Invalid_Argument
+	}
 	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		return dynamic_pool_alloc_bytes(pool, size)
+	case .Alloc:
+		return small_stack_alloc_bytes(s, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return small_stack_alloc_bytes_non_zeroed(s, size, alignment, loc)
 	case .Free:
-		return nil, .Mode_Not_Implemented
+		return nil, small_stack_free(s, old_memory, loc)
 	case .Free_All:
-		dynamic_pool_free_all(pool)
-		return nil, nil
-	case .Resize, .Resize_Non_Zeroed:
-		if old_size >= size {
-			return byte_slice(old_memory, size), nil
-		}
-		data, err := dynamic_pool_alloc_bytes(pool, size)
-		if err == nil {
-			runtime.copy(data, byte_slice(old_memory, old_size))
-		}
-		return data, err
-
+		small_stack_free_all(s)
+	case .Resize:
+		return small_stack_resize_bytes(s, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return small_stack_resize_bytes_non_zeroed(s, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
-			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features, .Query_Info}
+			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features}
 		}
 		return nil, nil
-
 	case .Query_Info:
-		info := (^Allocator_Query_Info)(old_memory)
-		if info != nil && info.pointer != nil {
-			info.size = pool.block_size
-			info.alignment = pool.alignment
-			return byte_slice(info, size_of(info^)), nil
-		}
-		return nil, nil
+		return nil, .Mode_Not_Implemented
 	}
 	return nil, nil
 }
 
 
-@(require_results)
-dynamic_pool_allocator :: proc(pool: ^Dynamic_Pool) -> Allocator {
-	return Allocator{
-		procedure = dynamic_pool_allocator_proc,
-		data = pool,
-	}
+/* Preserved for compatibility */
+Dynamic_Pool :: Dynamic_Arena
+DYNAMIC_POOL_BLOCK_SIZE_DEFAULT :: DYNAMIC_ARENA_BLOCK_SIZE_DEFAULT
+DYNAMIC_POOL_OUT_OF_BAND_SIZE_DEFAULT :: DYNAMIC_ARENA_OUT_OF_BAND_SIZE_DEFAULT
+dynamic_pool_allocator_proc :: dynamic_arena_allocator_proc
+dynamic_pool_free_all :: dynamic_arena_free_all
+dynamic_pool_reset :: dynamic_arena_reset
+dynamic_pool_alloc_bytes :: dynamic_arena_alloc_bytes
+dynamic_pool_alloc :: dynamic_arena_alloc
+dynamic_pool_init :: dynamic_arena_init
+dynamic_pool_allocator :: dynamic_arena_allocator
+dynamic_pool_destroy :: dynamic_arena_destroy
+
+/*
+Default block size for dynamic arena.
+*/
+DYNAMIC_ARENA_BLOCK_SIZE_DEFAULT :: 65536
+
+/*
+Default out-band size of the dynamic arena.
+*/
+DYNAMIC_ARENA_OUT_OF_BAND_SIZE_DEFAULT :: 6554
+
+/*
+Dynamic arena allocator data.
+*/
+Dynamic_Arena :: struct {
+	block_size: int,
+	out_band_size: int,
+	alignment: int,
+	unused_blocks: [dynamic]rawptr,
+	used_blocks: [dynamic]rawptr,
+	out_band_allocations: [dynamic]rawptr,
+	current_block: rawptr,
+	current_pos: rawptr,
+	bytes_left: int,
+	block_allocator: Allocator,
 }
 
-dynamic_pool_init :: proc(pool: ^Dynamic_Pool,
-                          block_allocator := context.allocator,
-                          array_allocator := context.allocator,
-                          block_size := DYNAMIC_POOL_BLOCK_SIZE_DEFAULT,
-                          out_band_size := DYNAMIC_POOL_OUT_OF_BAND_SIZE_DEFAULT,
-                          alignment := 8) {
-	pool.block_size      = block_size
-	pool.out_band_size   = out_band_size
-	pool.alignment       = alignment
+/*
+Initialize a dynamic arena.
+
+This procedure initializes a dynamic arena. The specified `block_allocator`
+will be used to allocate arena blocks, and `array_allocator` to allocate
+arrays of blocks and out-band blocks. The blocks have the default size of
+`block_size` and out-band threshold will be `out_band_size`. All allocations
+will be aligned to a boundary specified by `alignment`.
+*/
+dynamic_arena_init :: proc(
+	pool: ^Dynamic_Arena,
+	block_allocator := context.allocator,
+	array_allocator := context.allocator,
+	block_size := DYNAMIC_ARENA_BLOCK_SIZE_DEFAULT,
+	out_band_size := DYNAMIC_ARENA_OUT_OF_BAND_SIZE_DEFAULT,
+	alignment := DEFAULT_ALIGNMENT,
+) {
+	pool.block_size = block_size
+	pool.out_band_size = out_band_size
+	pool.alignment = alignment
 	pool.block_allocator = block_allocator
 	pool.out_band_allocations.allocator = array_allocator
-	pool.       unused_blocks.allocator = array_allocator
-	pool.         used_blocks.allocator = array_allocator
+	pool.unused_blocks.allocator = array_allocator
+	pool.used_blocks.allocator = array_allocator
 }
 
-dynamic_pool_destroy :: proc(pool: ^Dynamic_Pool) {
-	dynamic_pool_free_all(pool)
-	delete(pool.unused_blocks)
-	delete(pool.used_blocks)
-	delete(pool.out_band_allocations)
+/*
+Dynamic arena allocator.
 
-	zero(pool, size_of(pool^))
-}
+The dynamic arena allocator uses blocks of a specific size, allocated on-demand
+using the block allocator. This allocator acts similarly to arena. All
+allocations in a block happen contiguously, from start to end. If an allocation
+does not fit into the remaining space of the block, and its size is smaller
+than the specified out-band size, a new block is allocated using the
+`block_allocator` and the allocation is performed from a newly-allocated block.
 
+If an allocation has bigger size than the specified out-band size, a new block
+is allocated such that the allocation fits into this new block. This is referred
+to as an *out-band allocation*. The out-band blocks are kept separately from
+normal blocks.
 
+Just like arena, the dynamic arena does not support freeing of individual
+objects.
+*/
 @(require_results)
-dynamic_pool_alloc :: proc(pool: ^Dynamic_Pool, bytes: int) -> (rawptr, Allocator_Error) {
-	data, err := dynamic_pool_alloc_bytes(pool, bytes)
+dynamic_arena_allocator :: proc(a: ^Dynamic_Arena) -> Allocator {
+	return Allocator{
+		procedure = dynamic_arena_allocator_proc,
+		data = a,
+	}
+}
+
+/*
+Destroy a dynamic arena.
+
+This procedure frees all allocations, made on a dynamic arena, including the
+unused blocks, as well as the arrays for storing blocks.
+*/
+dynamic_arena_destroy :: proc(a: ^Dynamic_Arena) {
+	dynamic_arena_free_all(a)
+	delete(a.unused_blocks)
+	delete(a.used_blocks)
+	delete(a.out_band_allocations)
+	zero(a, size_of(a^))
+}
+
+@(private="file")
+_dynamic_arena_cycle_new_block :: proc(a: ^Dynamic_Arena, loc := #caller_location) -> (err: Allocator_Error) {
+	if a.block_allocator.procedure == nil {
+		panic("You must call arena_init on a Pool before using it", loc)
+	}
+	if a.current_block != nil {
+		append(&a.used_blocks, a.current_block, loc=loc)
+	}
+	new_block: rawptr
+	if len(a.unused_blocks) > 0 {
+		new_block = pop(&a.unused_blocks)
+	} else {
+		data: []byte
+		data, err = a.block_allocator.procedure(
+			a.block_allocator.data,
+			Allocator_Mode.Alloc,
+			a.block_size,
+			a.alignment,
+			nil,
+			0,
+		)
+		new_block = raw_data(data)
+	}
+	a.bytes_left    = a.block_size
+	a.current_pos   = new_block
+	a.current_block = new_block
+	return
+}
+
+/*
+Allocate memory from a dynamic arena.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from a dynamic arena `a`. The allocated memory is
+zero-initialized. This procedure returns a pointer to the newly allocated memory
+region.
+*/
+@(private, require_results)
+dynamic_arena_alloc :: proc(a: ^Dynamic_Arena, size: int, loc := #caller_location) -> (rawptr, Allocator_Error) {
+	data, err := dynamic_arena_alloc_bytes(a, size, loc)
 	return raw_data(data), err
 }
 
+/*
+Allocate memory from a dynamic arena.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from a dynamic arena `a`. The allocated memory is
+zero-initialized. This procedure returns a slice of the newly allocated memory
+region.
+*/
 @(require_results)
-dynamic_pool_alloc_bytes :: proc(p: ^Dynamic_Pool, bytes: int) -> ([]byte, Allocator_Error) {
-	cycle_new_block :: proc(p: ^Dynamic_Pool) -> (err: Allocator_Error) {
-		if p.block_allocator.procedure == nil {
-			panic("You must call pool_init on a Pool before using it")
-		}
+dynamic_arena_alloc_bytes :: proc(a: ^Dynamic_Arena, size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	bytes, err := dynamic_arena_alloc_bytes_non_zeroed(a, size, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-		if p.current_block != nil {
-			append(&p.used_blocks, p.current_block)
-		}
+/*
+Allocate non-initialized memory from a dynamic arena.
 
-		new_block: rawptr
-		if len(p.unused_blocks) > 0 {
-			new_block = pop(&p.unused_blocks)
-		} else {
-			data: []byte
-			data, err = p.block_allocator.procedure(p.block_allocator.data, Allocator_Mode.Alloc,
-			                                        p.block_size, p.alignment,
-			                                        nil, 0)
-			new_block = raw_data(data)
-		}
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from a dynamic arena `a`. The allocated memory is not explicitly
+zero-initialized. This procedure returns a pointer to the newly allocated
+memory region.
+*/
+@(require_results)
+dynamic_arena_alloc_non_zeroed :: proc(a: ^Dynamic_Arena, size: int, loc := #caller_location) -> (rawptr, Allocator_Error) {
+	data, err := dynamic_arena_alloc_bytes_non_zeroed(a, size, loc)
+	return raw_data(data), err
+}
 
-		p.bytes_left    = p.block_size
-		p.current_pos   = new_block
-		p.current_block = new_block
-		return
-	}
+/*
+Allocate non-initialized memory from a dynamic arena.
 
-	n := align_formula(bytes, p.alignment)
-	if n > p.block_size {
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment` from a dynamic arena `a`. The allocated memory is not explicitly
+zero-initialized. This procedure returns a slice of the newly allocated
+memory region.
+*/
+@(require_results)
+dynamic_arena_alloc_bytes_non_zeroed :: proc(a: ^Dynamic_Arena, size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+	n := align_formula(size, a.alignment)
+	if n > a.block_size {
 		return nil, .Invalid_Argument
 	}
-	if n >= p.out_band_size {
-		assert(p.block_allocator.procedure != nil)
-		memory, err := p.block_allocator.procedure(p.block_allocator.data, Allocator_Mode.Alloc,
-		                                           p.block_size, p.alignment,
-		                                           nil, 0)
+	if n >= a.out_band_size {
+		assert(a.block_allocator.procedure != nil, "Backing block allocator must be initialized", loc=loc)
+		memory, err := alloc_bytes_non_zeroed(a.block_size, a.alignment, a.block_allocator, loc)
 		if memory != nil {
-			append(&p.out_band_allocations, raw_data(memory))
+			append(&a.out_band_allocations, raw_data(memory), loc = loc)
 		}
 		return memory, err
 	}
-
-	if p.bytes_left < n {
-		err := cycle_new_block(p)
+	if a.bytes_left < n {
+		err := _dynamic_arena_cycle_new_block(a, loc)
 		if err != nil {
 			return nil, err
 		}
-		if p.current_block == nil {
+		if a.current_block == nil {
 			return nil, .Out_Of_Memory
 		}
 	}
-
-	memory := p.current_pos
-	p.current_pos = ([^]byte)(p.current_pos)[n:]
-	p.bytes_left -= n
-	return ([^]byte)(memory)[:bytes], nil
+	memory := a.current_pos
+	a.current_pos = ([^]byte)(a.current_pos)[n:]
+	a.bytes_left -= n
+	return ([^]byte)(memory)[:size], nil
 }
 
+/*
+Reset the dynamic arena.
 
-dynamic_pool_reset :: proc(p: ^Dynamic_Pool) {
-	if p.current_block != nil {
-		append(&p.unused_blocks, p.current_block)
-		p.current_block = nil
+This procedure frees all the allocations, owned by the dynamic arena, excluding
+the unused blocks.
+*/
+dynamic_arena_reset :: proc(a: ^Dynamic_Arena, loc := #caller_location) {
+	if a.current_block != nil {
+		append(&a.unused_blocks, a.current_block, loc=loc)
+		a.current_block = nil
 	}
-
-	for block in p.used_blocks {
-		append(&p.unused_blocks, block)
+	for block in a.used_blocks {
+		append(&a.unused_blocks, block, loc=loc)
 	}
-	clear(&p.used_blocks)
+	clear(&a.used_blocks)
+	for allocation in a.out_band_allocations {
+		free(allocation, a.block_allocator, loc=loc)
+	}
+	clear(&a.out_band_allocations)
+	a.bytes_left = 0 // Make new allocations call `_dynamic_arena_cycle_new_block` again.
+}
+
+/*
+Free all memory from a dynamic arena.
 
-	for a in p.out_band_allocations {
-		free(a, p.block_allocator)
+This procedure frees all the allocations, owned by the dynamic arena, including
+the unused blocks.
+*/
+dynamic_arena_free_all :: proc(a: ^Dynamic_Arena, loc := #caller_location) {
+	dynamic_arena_reset(a)
+	for block in a.unused_blocks {
+		free(block, a.block_allocator, loc)
 	}
-	clear(&p.out_band_allocations)
+	clear(&a.unused_blocks)
+}
+
+/*
+Resize an allocation.
+
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is zero-initialized.
 
-	p.bytes_left = 0 // Make new allocations call `cycle_new_block` again.
+If `old_memory` is `nil`, this procedure acts just like `dynamic_arena_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `dynamic_arena_free()`, freeing
+the memory region located at an address specified by `old_memory`.
+
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+dynamic_arena_resize :: proc(
+	a: ^Dynamic_Arena,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := dynamic_arena_resize_bytes(a, byte_slice(old_memory, old_size), size, loc)
+	return raw_data(bytes), err
 }
 
-dynamic_pool_free_all :: proc(p: ^Dynamic_Pool) {
-	dynamic_pool_reset(p)
+/*
+Resize an allocation.
+
+This procedure resizes a memory region, specified by `old_data`, to have a size
+`size` and alignment `alignment`. The newly allocated memory, if any is
+zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `dynamic_arena_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
 
-	for block in p.unused_blocks {
-		free(block, p.block_allocator)
+If `size` is 0, this procedure acts just like `dynamic_arena_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+dynamic_arena_resize_bytes :: proc(
+	a: ^Dynamic_Arena,
+	old_data: []byte,
+	size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := dynamic_arena_resize_bytes_non_zeroed(a, old_data, size, loc)
+	if bytes != nil {
+		if old_data == nil {
+			zero_slice(bytes)
+		} else if size > len(old_data) {
+			zero_slice(bytes[len(old_data):])
+		}
 	}
-	clear(&p.unused_blocks)
+	return bytes, err
 }
 
+/*
+Resize an allocation without zero-initialization.
 
-panic_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int,loc := #caller_location) -> ([]byte, Allocator_Error) {
+This procedure resizes a memory region, defined by its location, `old_memory`,
+and its size, `old_size` to have a size `size` and alignment `alignment`. The
+newly allocated memory, if any is not explicitly zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `dynamic_arena_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `dynamic_arena_free()`, freeing the
+memory region located at an address specified by `old_memory`.
+
+This procedure returns the pointer to the resized memory region.
+*/
+@(require_results)
+dynamic_arena_resize_non_zeroed :: proc(
+	a: ^Dynamic_Arena,
+	old_memory: rawptr,
+	old_size: int,
+	size: int,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := dynamic_arena_resize_bytes_non_zeroed(a, byte_slice(old_memory, old_size), size, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation.
 
+This procedure resizes a memory region, specified by `old_data`, to have a size
+`size` and alignment `alignment`. The newly allocated memory, if any is not
+explicitly zero-initialized.
+
+If `old_memory` is `nil`, this procedure acts just like `dynamic_arena_alloc()`,
+allocating a memory region `size` bytes in size, aligned on a boundary specified
+by `alignment`.
+
+If `size` is 0, this procedure acts just like `dynamic_arena_free()`, freeing
+the memory region located at an address specified by `old_memory`.
+
+This procedure returns the slice of the resized memory region.
+*/
+@(require_results)
+dynamic_arena_resize_bytes_non_zeroed :: proc(
+	a: ^Dynamic_Arena,
+	old_data: []byte,
+	size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	old_memory := raw_data(old_data)
+	old_size := len(old_data)
+	if old_size >= size {
+		return byte_slice(old_memory, size), nil
+	}
+	data, err := dynamic_arena_alloc_bytes_non_zeroed(a, size, loc)
+	if err == nil {
+		runtime.copy(data, byte_slice(old_memory, old_size))
+	}
+	return data, err
+}
+
+dynamic_arena_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	arena := (^Dynamic_Arena)(allocator_data)
 	switch mode {
 	case .Alloc:
-		if size > 0 {
-			panic("mem: panic allocator, .Alloc called", loc=loc)
-		}
+		return dynamic_arena_alloc_bytes(arena, size, loc)
 	case .Alloc_Non_Zeroed:
-		if size > 0 {
-			panic("mem: panic allocator, .Alloc_Non_Zeroed called", loc=loc)
-		}
-	case .Resize:
-		if size > 0 {
-			panic("mem: panic allocator, .Resize called", loc=loc)
-		}
-	case .Resize_Non_Zeroed:
-		if size > 0 {
-			panic("mem: panic allocator, .Resize_Non_Zeroed called", loc=loc)
-		}
+		return dynamic_arena_alloc_bytes_non_zeroed(arena, size, loc)
 	case .Free:
-		if old_memory != nil {
-			panic("mem: panic allocator, .Free called", loc=loc)
-		}
+		return nil, .Mode_Not_Implemented
 	case .Free_All:
-		panic("mem: panic allocator, .Free_All called", loc=loc)
-
+		dynamic_arena_free_all(arena, loc)
+	case .Resize:
+		return dynamic_arena_resize_bytes(arena, byte_slice(old_memory, old_size), size, loc)
+	case .Resize_Non_Zeroed:
+		return dynamic_arena_resize_bytes_non_zeroed(arena, byte_slice(old_memory, old_size), size, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
-			set^ = {.Query_Features}
+			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free_All, .Resize, .Resize_Non_Zeroed, .Query_Features, .Query_Info}
 		}
 		return nil, nil
-
 	case .Query_Info:
-		panic("mem: panic allocator, .Query_Info called", loc=loc)
+		info := (^Allocator_Query_Info)(old_memory)
+		if info != nil && info.pointer != nil {
+			info.size = arena.block_size
+			info.alignment = arena.alignment
+			return byte_slice(info, size_of(info^)), nil
+		}
+		return nil, nil
 	}
-
 	return nil, nil
 }
 
-@(require_results)
-panic_allocator :: proc() -> Allocator {
-	return Allocator{
-		procedure = panic_allocator_proc,
-		data = nil,
-	}
-}
-
-
-
-
-
 
+/*
+Header of the buddy block.
+*/
 Buddy_Block :: struct #align(align_of(uint)) {
 	size:    uint,
 	is_free: bool,
 }
 
+/*
+Obtain the next buddy block.
+*/
 @(require_results)
 buddy_block_next :: proc(block: ^Buddy_Block) -> ^Buddy_Block {
 	return (^Buddy_Block)(([^]byte)(block)[block.size:])
 }
 
+/*
+Split the block into two, by truncating the given block to a given size.
+*/
 @(require_results)
 buddy_block_split :: proc(block: ^Buddy_Block, size: uint) -> ^Buddy_Block {
 	block := block
@@ -894,12 +1971,14 @@ buddy_block_split :: proc(block: ^Buddy_Block, size: uint) -> ^Buddy_Block {
 	return nil
 }
 
+/*
+Coalesce contiguous blocks in a range of blocks into one.
+*/
 buddy_block_coalescence :: proc(head, tail: ^Buddy_Block) {
 	for {
 		// Keep looping until there are no more buddies to coalesce
 		block := head
 		buddy := buddy_block_next(block)
-
 		no_coalescence := true
 		for block < tail && buddy < tail { // make sure the buddies are within the range
 			if block.is_free && buddy.is_free && block.size == buddy.size {
@@ -922,28 +2001,26 @@ buddy_block_coalescence :: proc(head, tail: ^Buddy_Block) {
 				}
 			}
 		}
-
 		if no_coalescence {
 			return
 		}
 	}
 }
 
-
+/*
+Find the best block for storing a given size in a range of blocks.
+*/
 @(require_results)
 buddy_block_find_best :: proc(head, tail: ^Buddy_Block, size: uint) -> ^Buddy_Block {
 	assert(size != 0)
-
 	best_block: ^Buddy_Block
 	block := head                    // left
 	buddy := buddy_block_next(block) // right
-
 	// The entire memory section between head and tail is free,
 	// just call 'buddy_block_split' to get the allocation
 	if buddy == tail && block.is_free {
 		return buddy_block_split(block, size)
 	}
-
 	// Find the block which is the 'best_block' to requested allocation sized
 	for block < tail && buddy < tail { // make sure the buddies are within the range
 		// If both buddies are free, coalesce them together
@@ -954,7 +2031,6 @@ buddy_block_find_best :: proc(head, tail: ^Buddy_Block, size: uint) -> ^Buddy_Bl
 			if size <= block.size && (best_block == nil || block.size <= best_block.size) {
 				best_block = block
 			}
-
 			block = buddy_block_next(buddy)
 			if block < tail {
 				// Delay the buddy block for the next iteration
@@ -962,20 +2038,16 @@ buddy_block_find_best :: proc(head, tail: ^Buddy_Block, size: uint) -> ^Buddy_Bl
 			}
 			continue
 		}
-
-
 		if block.is_free && size <= block.size &&
 		   (best_block == nil || block.size <= best_block.size) {
 			best_block = block
 		}
-
 		if buddy.is_free && size <= buddy.size &&
 		   (best_block == nil || buddy.size < best_block.size) {
 			// If each buddy are the same size, then it makes more sense
 			// to pick the buddy as it "bounces around" less
 			best_block = buddy
 		}
-
 		if (block.size <= buddy.size) {
 			block = buddy_block_next(buddy)
 			if (block < tail) {
@@ -988,23 +2060,33 @@ buddy_block_find_best :: proc(head, tail: ^Buddy_Block, size: uint) -> ^Buddy_Bl
 			buddy = buddy_block_next(buddy)
 		}
 	}
-
 	if best_block != nil {
 		// This will handle the case if the 'best_block' is also the perfect fit
 		return buddy_block_split(best_block, size)
 	}
-
 	// Maybe out of memory
 	return nil
 }
 
-
+/*
+The buddy allocator data.
+*/
 Buddy_Allocator :: struct {
 	head: ^Buddy_Block,
 	tail: ^Buddy_Block,
 	alignment: uint,
 }
 
+/*
+Buddy allocator.
+
+The buddy allocator is a type of allocator that splits the backing buffer into
+multiple regions called buddy blocks. Initially, the allocator only has one
+block with the size of the backing buffer. Upon each allocation, the allocator
+finds the smallest block that can fit the size of requested memory region, and
+splits the block according to the allocation size. If no block can be found,
+the contiguous free blocks are coalesced and the search is performed again.
+*/
 @(require_results)
 buddy_allocator :: proc(b: ^Buddy_Allocator) -> Allocator {
 	return Allocator{
@@ -1013,48 +2095,97 @@ buddy_allocator :: proc(b: ^Buddy_Allocator) -> Allocator {
 	}
 }
 
-buddy_allocator_init :: proc(b: ^Buddy_Allocator, data: []byte, alignment: uint) {
-	assert(data != nil)
-	assert(is_power_of_two(uintptr(len(data))))
-	assert(is_power_of_two(uintptr(alignment)))
+/*
+Initialize the buddy allocator.
 
+This procedure initializes the buddy allocator `b` with a backing buffer `data`
+and block alignment specified by `alignment`.
+*/
+buddy_allocator_init :: proc(b: ^Buddy_Allocator, data: []byte, alignment: uint, loc := #caller_location) {
+	assert(data != nil)
+	assert(is_power_of_two(uintptr(len(data))), "Size of the backing buffer must be power of two", loc)
+	assert(is_power_of_two(uintptr(alignment)), "Alignment must be a power of two", loc)
 	alignment := alignment
 	if alignment < size_of(Buddy_Block) {
 		alignment = size_of(Buddy_Block)
 	}
-
 	ptr := raw_data(data)
-	assert(uintptr(ptr) % uintptr(alignment) == 0, "data is not aligned to minimum alignment")
-
+	assert(uintptr(ptr) % uintptr(alignment) == 0, "data is not aligned to minimum alignment", loc)
 	b.head = (^Buddy_Block)(ptr)
-
 	b.head.size = len(data)
 	b.head.is_free = true
-
 	b.tail = buddy_block_next(b.head)
-
 	b.alignment = alignment
 }
 
+/*
+Get required block size to fit in the allocation as well as the alignment padding.
+*/
 @(require_results)
 buddy_block_size_required :: proc(b: ^Buddy_Allocator, size: uint) -> uint {
 	size := size
 	actual_size := b.alignment
 	size += size_of(Buddy_Block)
 	size = align_forward_uint(size, b.alignment)
-
 	for size > actual_size {
 		actual_size <<= 1
 	}
-
 	return actual_size
 }
 
+/*
+Allocate memory from a buddy allocator.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is zero-initialized. This procedure
+returns a pointer to the allocated memory region.
+*/
 @(require_results)
-buddy_allocator_alloc :: proc(b: ^Buddy_Allocator, size: uint, zeroed: bool) -> ([]byte, Allocator_Error) {
+buddy_allocator_alloc :: proc(b: ^Buddy_Allocator, size: uint) -> (rawptr, Allocator_Error) {
+	bytes, err := buddy_allocator_alloc_bytes(b, size)
+	return raw_data(bytes), err
+}
+
+/*
+Allocate memory from a buddy allocator.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is zero-initialized. This procedure
+returns a slice of the allocated memory region.
+*/
+@(require_results)
+buddy_allocator_alloc_bytes :: proc(b: ^Buddy_Allocator, size: uint) -> ([]byte, Allocator_Error) {
+	bytes, err := buddy_allocator_alloc_bytes_non_zeroed(b, size)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
+
+/*
+Allocate non-initialized memory from a buddy allocator.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is not explicitly zero-initialized.
+This procedure returns a pointer to the allocated memory region.
+*/
+@(require_results)
+buddy_allocator_alloc_non_zeroed :: proc(b: ^Buddy_Allocator, size: uint) -> (rawptr, Allocator_Error) {
+	bytes, err := buddy_allocator_alloc_bytes_non_zeroed(b, size)
+	return raw_data(bytes), err
+}
+
+/*
+Allocate non-initialized memory from a buddy allocator.
+
+This procedure allocates `size` bytes of memory aligned on a boundary specified
+by `alignment`. The allocated memory region is not explicitly zero-initialized.
+This procedure returns a slice of the allocated memory region.
+*/
+@(require_results)
+buddy_allocator_alloc_bytes_non_zeroed :: proc(b: ^Buddy_Allocator, size: uint) -> ([]byte, Allocator_Error) {
 	if size != 0 {
 		actual_size := buddy_block_size_required(b, size)
-
 		found := buddy_block_find_best(b.head, b.tail, actual_size)
 		if found != nil {
 			// Try to coalesce all the free buddy blocks and then search again
@@ -1065,60 +2196,71 @@ buddy_allocator_alloc :: proc(b: ^Buddy_Allocator, size: uint, zeroed: bool) ->
 			return nil, .Out_Of_Memory
 		}
 		found.is_free = false
-
 		data := ([^]byte)(found)[b.alignment:][:size]
-		if zeroed {
-			zero_slice(data)
-		}
 		return data, nil
 	}
 	return nil, nil
 }
 
+/*
+Free memory to the buddy allocator.
+
+This procedure frees the memory region allocated at pointer `ptr`.
+
+If `ptr` is not the latest allocation and is not a leaked allocation, this
+operation is a no-op.
+*/
 buddy_allocator_free :: proc(b: ^Buddy_Allocator, ptr: rawptr) -> Allocator_Error {
 	if ptr != nil {
 		if !(b.head <= ptr && ptr <= b.tail) {
 			return .Invalid_Pointer
 		}
-
 		block := (^Buddy_Block)(([^]byte)(ptr)[-b.alignment:])
 		block.is_free = true
-
 		buddy_block_coalescence(b.head, b.tail)
 	}
 	return nil
 }
 
-buddy_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int,loc := #caller_location) -> ([]byte, Allocator_Error) {
+/*
+Free all memory to the buddy allocator.
+*/
+buddy_allocator_free_all :: proc(b: ^Buddy_Allocator) {
+	alignment := b.alignment
+	head := ([^]byte)(b.head)
+	tail := ([^]byte)(b.tail)
+	data := head[:ptr_sub(tail, head)]
+	buddy_allocator_init(b, data, alignment)	
+}
 
+buddy_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
 	b := (^Buddy_Allocator)(allocator_data)
-
 	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		return buddy_allocator_alloc(b, uint(size), mode == .Alloc)
+	case .Alloc:
+		return buddy_allocator_alloc_bytes(b, uint(size))
+	case .Alloc_Non_Zeroed:
+		return buddy_allocator_alloc_bytes_non_zeroed(b, uint(size))
 	case .Resize:
-		return default_resize_bytes_align(byte_slice(old_memory, old_size), size, alignment, buddy_allocator(b))
+		return default_resize_bytes_align(byte_slice(old_memory, old_size), size, alignment, buddy_allocator(b), loc)
 	case .Resize_Non_Zeroed:
-		return default_resize_bytes_align_non_zeroed(byte_slice(old_memory, old_size), size, alignment, buddy_allocator(b))
+		return default_resize_bytes_align_non_zeroed(byte_slice(old_memory, old_size), size, alignment, buddy_allocator(b), loc)
 	case .Free:
 		return nil, buddy_allocator_free(b, old_memory)
 	case .Free_All:
-
-		alignment := b.alignment
-		head := ([^]byte)(b.head)
-		tail := ([^]byte)(b.tail)
-		data := head[:ptr_sub(tail, head)]
-		buddy_allocator_init(b, data, alignment)
-
+		buddy_allocator_free_all(b)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 			set^ = {.Query_Features, .Alloc, .Alloc_Non_Zeroed, .Resize, .Resize_Non_Zeroed, .Free, .Free_All, .Query_Info}
 		}
 		return nil, nil
-
 	case .Query_Info:
 		info := (^Allocator_Query_Info)(old_memory)
 		if info != nil && info.pointer != nil {
@@ -1126,7 +2268,6 @@ buddy_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 			if !(b.head <= ptr && ptr <= b.tail) {
 				return nil, .Invalid_Pointer
 			}
-
 			block := (^Buddy_Block)(([^]byte)(ptr)[-b.alignment:])
 			info.size = int(block.size)
 			info.alignment = int(b.alignment)
@@ -1134,6 +2275,83 @@ buddy_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 		}
 		return nil, nil
 	}
-
 	return nil, nil
 }
+
+// An allocator that keeps track of allocation sizes and passes it along to resizes.
+// This is useful if you are using a library that needs an equivalent of `realloc` but want to use
+// the Odin allocator interface.
+//
+// You want to wrap your allocator into this one if you are trying to use any allocator that relies
+// on the old size to work.
+//
+// The overhead of this allocator is an extra max(alignment, size_of(Header)) bytes allocated for each allocation, these bytes are
+// used to store the size and original pointer.
+Compat_Allocator :: struct {
+	parent: Allocator,
+}
+
+compat_allocator_init :: proc(rra: ^Compat_Allocator, allocator := context.allocator) {
+	rra.parent = allocator
+}
+
+compat_allocator :: proc(rra: ^Compat_Allocator) -> Allocator {
+	return Allocator{
+		data      = rra,
+		procedure = compat_allocator_proc,
+	}
+}
+
+compat_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
+                             size, alignment: int,
+                             old_memory: rawptr, old_size: int,
+                             location := #caller_location) -> (data: []byte, err: Allocator_Error) {
+	size, old_size := size, old_size
+
+	Header :: struct {
+		size: int,
+		ptr:  rawptr,
+	}
+
+	rra := (^Compat_Allocator)(allocator_data)
+	switch mode {
+	case .Alloc, .Alloc_Non_Zeroed:
+		a    := max(alignment, size_of(Header))
+		size += a
+		assert(size >= 0, "overflow")
+
+		allocation := rra.parent.procedure(rra.parent.data, mode, size, alignment, old_memory, old_size, location) or_return
+		#no_bounds_check data = allocation[a:]
+
+		([^]Header)(raw_data(data))[-1] = {
+			size = size,
+			ptr  = raw_data(allocation),
+		}
+		return
+
+	case .Free:
+		header := ([^]Header)(old_memory)[-1]
+		return rra.parent.procedure(rra.parent.data, mode, size, alignment, header.ptr, header.size, location)
+
+	case .Resize, .Resize_Non_Zeroed:
+		header := ([^]Header)(old_memory)[-1]
+
+		a    := max(alignment, size_of(header))
+		size += a
+		assert(size >= 0, "overflow")
+
+		allocation := rra.parent.procedure(rra.parent.data, mode, size, alignment, header.ptr, header.size, location) or_return
+		#no_bounds_check data = allocation[a:]
+
+		([^]Header)(raw_data(data))[-1] = {
+			size = size,
+			ptr  = raw_data(allocation),
+		}
+		return
+
+	case .Free_All, .Query_Info, .Query_Features:
+		return rra.parent.procedure(rra.parent.data, mode, size, alignment, old_memory, old_size, location)
+
+	case: unreachable()
+	}
+}
diff --git a/core/mem/doc.odin b/core/mem/doc.odin
index 44c93f798..98755d797 100644
--- a/core/mem/doc.odin
+++ b/core/mem/doc.odin
@@ -1,34 +1,114 @@
 /*
-package mem implements various types of allocators.
+The `mem` package implements various allocators and provides utility procedures
+for dealing with memory, pointers and slices.
 
+The documentation below describes basic concepts, applicable to the `mem`
+package.
 
-An example of how to use the `Tracking_Allocator` to track subsequent allocations
-in your program and report leaks and bad frees:
+## Pointers, multipointers, and slices
 
-Example:
-	package foo
+A *pointer* is an abstraction of an *address*, a numberic value representing the
+location of an object in memory. That object is said to be *pointed to* by the
+pointer. To obtain the address of a pointer, cast it to `uintptr`.
 
-	import "core:mem"
-	import "core:fmt"
+A multipointer is a pointer that points to multiple objects. Unlike a pointer,
+a multipointer can be indexed, but does not have a definite length. A slice is
+a pointer that points to multiple objects equipped with the length, specifying
+the amount of objects a slice points to.
 
-	_main :: proc() {
-		// do stuff
-	}
+When object's values are read through a pointer, that operation is called a
+*load* operation. When memory is read through a pointer, that operation is
+called a *store* operation. Both of these operations can be called a *memory
+access operation*.
 
-	main :: proc() {
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		defer mem.tracking_allocator_destroy(&track)
-		context.allocator = mem.tracking_allocator(&track)
+## Allocators
 
-		_main()
+In C and C++ memory models, allocations of objects in memory are typically
+treated individually with a generic allocator (The `malloc` procedure). Which in
+some scenarios can lead to poor cache utilization, slowdowns on individual
+objects' memory management and growing complexity of the code needing to keep
+track of the pointers and their lifetimes.
 
-		for _, leak in track.allocation_map {
-			fmt.printf("%v leaked %m\n", leak.location, leak.size)
-		}
-		for bad_free in track.bad_free_array {
-			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
-		}
-	}
+Using different kinds of *allocators* for different purposes can solve these
+problems. The allocators are typically optimized for specific use-cases and
+can potentially simplify the memory management code.
+
+For example, in the context of making a game, having an Arena allocator could
+simplify allocations of any temporary memory, because the programmer doesn't
+have to keep track of which objects need to be freed every time they are
+allocated, because at the end of every frame the whole allocator is reset to
+its initial state and all objects are freed at once.
+
+The allocators have different kinds of restrictions on object lifetimes, sizes,
+alignment and can be a significant gain, if used properly. Odin supports
+allocators on a language level.
+
+Operations such as `new`, `free` and `delete` by default will use
+`context.allocator`, which can be overridden by the user. When an override
+happens all called procedures will inherit the new context and use the same
+allocator.
+
+We will define one concept to simplify the description of some allocator-related
+procedures, which is ownership. If the memory was allocated via a specific
+allocator, that allocator is said to be the *owner* of that memory region. To
+note, unlike Rust, in Odin the memory ownership model is not strict.
+
+## Alignment
+
+An address is said to be *aligned to `N` bytes*, if the addresses's numeric
+value is divisible by `N`. The number `N` in this case can be referred to as
+the *alignment boundary*. Typically an alignment is a power of two integer
+value.
+
+A *natural alignment* of an object is typically equal to its size. For example
+a 16 bit integer has a natural alignment of 2 bytes. When an object is not
+located on its natural alignment boundary, accesses to that object are
+considered *unaligned*.
+
+Some machines issue a hardware **exception**, or experience **slowdowns** when a
+memory access operation occurs from an unaligned address. Examples of such
+operations are:
+
+- SIMD instructions on x86. These instructions require all memory accesses to be
+  on an address that is aligned to 16 bytes.
+- On ARM unaligned loads have an extra cycle penalty.
+
+As such, many operations that allocate memory in this package allow to
+explicitly specify the alignment of allocated pointers/slices. The default
+alignment for all operations is specified in a constant `mem.DEFAULT_ALIGNMENT`.
+
+## Zero by default
+
+Whenever new memory is allocated, via an allocator, or on the stack, by default
+Odin will zero-initialize that memory, even if it wasn't explicitly
+initialized. This allows for some convenience in certain scenarios and ease of
+debugging, which will not be described in detail here.
+
+However zero-initialization can be a cause of slowdowns, when allocating large
+buffers. For this reason, allocators have `*_non_zeroed` modes of allocation
+that allow the user to request for uninitialized memory and will avoid a
+relatively expensive zero-filling of the buffer.
+
+## Naming conventions
+
+The word `size` is used to denote the **size in bytes**. The word `length` is
+used to denote the count of objects.
+
+The allocation procedures use the following conventions:
+
+- If the name contains `alloc_bytes` or `resize_bytes`, then the procedure takes
+  in slice parameters and returns slices.
+- If the procedure name contains `alloc` or `resize`, then the procedure takes
+  in a raw pointer and returns raw pointers.
+- If the procedure name contains `free_bytes`, then the procedure takes in a
+  slice.
+- If the procedure name contains `free`, then the procedure takes in a pointer.
+
+Higher-level allocation procedures follow the following naming scheme:
+
+- `new`: Allocates a single object
+- `free`: Free a single object (opposite of `new`)
+- `make`: Allocate a group of objects
+- `delete`: Free a group of objects (opposite of `make`)
 */
 package mem
diff --git a/core/mem/mem.odin b/core/mem/mem.odin
index d423cc1eb..67ed56c39 100644
--- a/core/mem/mem.odin
+++ b/core/mem/mem.odin
@@ -3,49 +3,185 @@ package mem
 import "base:runtime"
 import "base:intrinsics"
 
-Byte     :: runtime.Byte
+/*
+The size, in bytes, of a single byte.
+
+This constant is equal to the value of `1`.
+*/
+Byte :: runtime.Byte
+
+/*
+The size, in bytes, of one kilobyte.
+
+This constant is equal to the amount of bytes in one kilobyte (also known as
+kibibyte), which is equal to 1024 bytes.
+*/
 Kilobyte :: runtime.Kilobyte
+
+/*
+The size, in bytes, of one megabyte.
+
+This constant is equal to the amount of bytes in one megabyte (also known as
+mebibyte), which is equal to 1024 kilobyte.
+*/
 Megabyte :: runtime.Megabyte
+
+/*
+The size, in bytes, of one gigabyte.
+
+This constant is equal to the amount of bytes in one gigabyte (also known as
+gibiibyte), which is equal to 1024 megabytes.
+*/
 Gigabyte :: runtime.Gigabyte
+
+/*
+The size, in bytes, of one terabyte.
+
+This constant is equal to the amount of bytes in one terabyte (also known as
+tebiibyte), which is equal to 1024 gigabytes.
+*/
 Terabyte :: runtime.Terabyte
+
+/*
+The size, in bytes, of one petabyte.
+
+This constant is equal to the amount of bytes in one petabyte (also known as
+pebiibyte), which is equal to 1024 terabytes.
+*/
 Petabyte :: runtime.Petabyte
-Exabyte  :: runtime.Exabyte
 
+/*
+The size, in bytes, of one exabyte.
+
+This constant is equal to the amount of bytes in one exabyte (also known as
+exbibyte), which is equal to 1024 petabytes.
+*/
+Exabyte :: runtime.Exabyte
+
+/*
+Set each byte of a memory range to a specific value.
+
+This procedure copies value specified by the `value` parameter into each of the
+`len` bytes of a memory range, located at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
 	return runtime.memset(data, i32(value), len)
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+*/
 zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	intrinsics.mem_zero(data, len)
 	return data
 }
+
+/*
+Set each byte of a memory range to zero.
+
+This procedure copies the value `0` into the `len` bytes of a memory range,
+starting at address `data`.
+
+This procedure returns the pointer to `data`.
+
+Unlike the `zero()` procedure, which can be optimized away or reordered by the
+compiler under certain circumstances, `zero_explicit()` procedure can not be
+optimized away or reordered with other memory access operations, and the
+compiler assumes volatile semantics of the memory.
+*/
 zero_explicit :: proc "contextless" (data: rawptr, len: int) -> rawptr {
 	// This routine tries to avoid the compiler optimizing away the call,
-	// so that it is always executed.  It is intended to provided
+	// so that it is always executed.  It is intended to provide
 	// equivalent semantics to those provided by the C11 Annex K 3.7.4.1
 	// memset_s call.
 	intrinsics.mem_zero_volatile(data, len) // Use the volatile mem_zero
 	intrinsics.atomic_thread_fence(.Seq_Cst) // Prevent reordering
 	return data
 }
+
+/*
+Zero-fill the memory of an object.
+
+This procedure sets each byte of the object pointed to by the pointer `item`
+to zero, and returns the pointer to `item`.
+*/
 zero_item :: proc "contextless" (item: $P/^$T) -> P {
 	intrinsics.mem_zero(item, size_of(T))
 	return item
 }
+
+/*
+Zero-fill the memory of the slice.
+
+This procedure sets each byte of the slice pointed to by the slice `data`
+to zero, and returns the slice `data`.
+*/
 zero_slice :: proc "contextless" (data: $T/[]$E) -> T {
 	zero(raw_data(data), size_of(E)*len(data))
 	return data
 }
 
+/*
+Copy bytes from one memory range to another.
 
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+*/
 copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy(dst, src, len)
 	return dst
 }
+
+/*
+Copy bytes between two non-overlapping memory ranges.
+
+This procedure copies `len` bytes of data, from the memory range pointed to by
+the `src` pointer into the memory range pointed to by the `dst` pointer, and
+returns the `dst` pointer.
+
+This is a slightly more optimized version of the `copy` procedure that requires
+that memory ranges specified by the parameters to this procedure are not
+overlapping. If the memory ranges specified by `dst` and `src` pointers overlap,
+the behavior of this function may be unpredictable.
+*/
 copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	intrinsics.mem_copy_non_overlapping(dst, src, len)
 	return dst
 }
 
+/*
+Compare two memory ranges defined by slices.
+
+This procedure performs a byte-by-byte comparison between memory ranges
+specified by slices `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `min(len(a), len(b))` bytes is compared between `a` and `b`.
+  - If the byte in slice `a` is smaller than a byte in slice `b`, then comparison
+  stops and this procedure returns `-1`.
+  - If the byte in slice `a` is bigger than a byte in slice `b`, then comparison
+  stops and this procedure returns `+1`.
+  - Otherwise the comparison continues until `min(len(a), len(b))` are compared.
+2. If all the bytes in the range are equal, then the lengths of the slices are
+  compared.
+  - If the length of slice `a` is smaller than the length of slice `b`, then `-1` is returned.
+  - If the length of slice `b` is smaller than the length of slice `b`, then `+1` is returned.
+  - Otherwise `0` is returned.
+*/
 @(require_results)
 compare :: proc "contextless" (a, b: []byte) -> int {
 	res := compare_byte_ptrs(raw_data(a), raw_data(b), min(len(a), len(b)))
@@ -57,16 +193,89 @@ compare :: proc "contextless" (a, b: []byte) -> int {
 	return res
 }
 
+/*
+Compare two memory ranges defined by byte pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
 @(require_results)
 compare_byte_ptrs :: proc "contextless" (a, b: ^byte, n: int) -> int #no_bounds_check {
 	return runtime.memory_compare(a, b, n)
 }
 
+/*
+Compare two memory ranges defined by pointers.
+
+This procedure performs a byte-by-byte comparison between memory ranges of size
+`n` located at addresses `a` and `b`, and returns a value, specifying their relative
+ordering.
+
+If the return value is:
+- Equal to `-1`, then `a` is "smaller" than `b`.
+- Equal to `+1`, then `a` is "bigger"  than `b`.
+- Equal to `0`, then `a` and `b` are equal.
+
+The comparison is performed as follows:
+1. Each byte, upto `n` bytes is compared between `a` and `b`.
+  - If the byte in `a` is smaller than a byte in `b`, then comparison stops
+  and this procedure returns `-1`.
+  - If the byte in `a` is bigger than a byte in `b`, then comparison stops
+  and this procedure returns `+1`.
+  - Otherwise the comparison continues until `n` bytes are compared.
+2. If all the bytes in the range are equal, this procedure returns `0`.
+*/
+@(require_results)
+compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
+	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
+}
+
+/*
+Check whether two objects are equal on binary level.
+
+This procedure checks whether the memory ranges occupied by objects `a` and
+`b` are equal. See `compare_byte_ptrs()` for how this comparison is done.
+*/
+@(require_results)
+simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
+	a, b := a, b
+	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
+}
+
+/*
+Check if the memory range defined by a slice is zero-filled.
+
+This procedure checks whether every byte, pointed to by the slice, specified
+by the parameter `data`, is zero. If all bytes of the slice are zero, this
+procedure returns `true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 check_zero :: proc(data: []byte) -> bool {
 	return check_zero_ptr(raw_data(data), len(data))
 }
 
+/*
+Check if the memory range defined defined by a pointer is zero-filled.
+
+This procedure checks whether each of the `len` bytes, starting at address
+`ptr` is zero. If all bytes of this range are zero, this procedure returns
+`true`. Otherwise this procedure returns `false`.
+*/
 @(require_results)
 check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	switch {
@@ -81,57 +290,99 @@ check_zero_ptr :: proc(ptr: rawptr, len: int) -> bool {
 	case 4: return intrinsics.unaligned_load((^u32)(ptr)) == 0
 	case 8: return intrinsics.unaligned_load((^u64)(ptr)) == 0
 	}
-
 	start := uintptr(ptr)
 	start_aligned := align_forward_uintptr(start, align_of(uintptr))
 	end := start + uintptr(len)
 	end_aligned := align_backward_uintptr(end, align_of(uintptr))
-
 	for b in start..<start_aligned {
 		if (^byte)(b)^ != 0 {
 			return false
 		}
 	}
-
 	for b := start_aligned; b < end_aligned; b += size_of(uintptr) {
 		if (^uintptr)(b)^ != 0 {
 			return false
 		}
 	}
-
 	for b in end_aligned..<end {
 		if (^byte)(b)^ != 0 {
 			return false
 		}
 	}
-
 	return true
 }
 
-@(require_results)
-simple_equal :: proc "contextless" (a, b: $T) -> bool where intrinsics.type_is_simple_compare(T) {
-	a, b := a, b
-	return compare_byte_ptrs((^byte)(&a), (^byte)(&b), size_of(T)) == 0
-}
+/*
+Offset a given pointer by a given amount.
 
-@(require_results)
-compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int {
-	return compare_byte_ptrs((^byte)(a), (^byte)(b), n)
-}
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)`, and returns the pointer `ptr`.
 
+**Note**: Prefer to use multipointer types, if possible.
+*/
 ptr_offset :: intrinsics.ptr_offset
+
+/*
+Offset a given pointer by a given amount backwards.
+
+This procedure offsets the pointer `ptr` to an object of type `T`, by the amount
+of bytes specified by `offset*size_of(T)` in the negative direction, and
+returns the pointer `ptr`.
+*/
 ptr_sub :: intrinsics.ptr_sub
 
+/*
+Construct a slice from pointer and length.
+
+This procedure creates a slice, that points to `len` amount of objects located
+at an address, specified by `ptr`.
+*/
 @(require_results)
 slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T {
 	return ([^]T)(ptr)[:len]
 }
 
+/*
+Construct a byte slice from raw pointer and length.
+
+This procedure creates a byte slice, that points to `len` amount of bytes
+located at an address specified by `data`.
+*/
 @(require_results)
 byte_slice :: #force_inline proc "contextless" (data: rawptr, #any_int len: int) -> []byte {
 	return ([^]u8)(data)[:max(len, 0)]
 }
 
+/*
+Create a byte slice from pointer and length.
+
+This procedure creates a byte slice, pointing to `len` objects, starting from
+the address specified by `ptr`.
+*/
+@(require_results)
+ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
+	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
+}
+
+/*
+Obtain the slice, pointing to the contents of `any`.
+
+This procedure returns the slice, pointing to the contents of the specified
+value of the `any` type.
+*/
+@(require_results)
+any_to_bytes :: proc "contextless" (val: any) -> []byte {
+	ti := type_info_of(val.id)
+	size := ti != nil ? ti.size : 0
+	return transmute([]byte)Raw_Slice{val.data, size}
+}
+
+/*
+Obtain a byte slice from any slice.
+
+This procedure returns a slice, that points to the same bytes as the slice,
+specified by `slice` and returns the resulting byte slice.
+*/
 @(require_results)
 slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	s := transmute(Raw_Slice)slice
@@ -139,6 +390,15 @@ slice_to_bytes :: proc "contextless" (slice: $E/[]$T) -> []byte {
 	return transmute([]byte)s
 }
 
+/*
+Transmute slice to a different type.
+
+This procedure performs an operation similar to transmute, returning a slice of
+type `T` that points to the same bytes as the slice specified by `slice`
+parameter. Unlike plain transmute operation, this procedure adjusts the length
+of the resulting slice, such that the resulting slice points to the correct
+amount of objects to cover the memory region pointed to by `slice`.
+*/
 @(require_results)
 slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	when size_of(A) == 0 || size_of(B) == 0 {
@@ -150,12 +410,25 @@ slice_data_cast :: proc "contextless" ($T: typeid/[]$A, slice: $S/[]$B) -> T {
 	}
 }
 
+/*
+Obtain data and length of a slice.
+
+This procedure returns the pointer to the start of the memory region pointed to
+by slice `slice` and the length of the slice.
+*/
 @(require_results)
 slice_to_components :: proc "contextless" (slice: $E/[]$T) -> (data: ^T, len: int) {
 	s := transmute(Raw_Slice)slice
 	return (^T)(s.data), s.len
 }
 
+/*
+Create a dynamic array from slice.
+
+This procedure creates a dynamic array, using slice `backing` as the backing
+buffer for the dynamic array. The resulting dynamic array can not grow beyond
+the size of the specified slice.
+*/
 @(require_results)
 buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	return transmute([dynamic]E)Raw_Dynamic_Array{
@@ -169,19 +442,12 @@ buffer_from_slice :: proc "contextless" (backing: $T/[]$E) -> [dynamic]E {
 	}
 }
 
-@(require_results)
-ptr_to_bytes :: proc "contextless" (ptr: ^$T, len := 1) -> []byte {
-	return transmute([]byte)Raw_Slice{ptr, len*size_of(T)}
-}
-
-@(require_results)
-any_to_bytes :: proc "contextless" (val: any) -> []byte {
-	ti := type_info_of(val.id)
-	size := ti != nil ? ti.size : 0
-	return transmute([]byte)Raw_Slice{val.data, size}
-}
-
+/*
+Check whether a number is a power of two.
 
+This procedure checks whether a given pointer-sized unsigned integer contains
+a power-of-two value.
+*/
 @(require_results)
 is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	if x <= 0 {
@@ -190,66 +456,167 @@ is_power_of_two :: proc "contextless" (x: uintptr) -> bool {
 	return (x & (x-1)) == 0
 }
 
-@(require_results)
-align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_forward_uintptr(uintptr(ptr), align))
+/*
+Check if a pointer is aligned.
+
+This procedure checks whether a pointer `x` is aligned to a boundary specified
+by `align`, and returns `true` if the pointer is aligned, and false otherwise.
+*/
+is_aligned :: proc "contextless" (x: rawptr, align: int) -> bool {
+	p := uintptr(x)
+	return (p & (1<<uintptr(align) - 1)) == 0
 }
 
+/*
+Align uintptr forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
 	assert(is_power_of_two(align))
+	return (ptr + align-1) & ~(align-1)
+}
 
-	p := ptr
-	modulo := p & (align-1)
-	if modulo != 0 {
-		p += align - modulo
-	}
-	return p
+/*
+Align pointer forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
+@(require_results)
+align_forward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_forward_uintptr(uintptr(ptr), align))
 }
 
+/*
+Align int forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_int :: proc(ptr, align: int) -> int {
 	return int(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
+
+/*
+Align uint forward.
+
+This procedure returns the next address after `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_forward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_forward_uintptr(uintptr(ptr), uintptr(align)))
 }
 
+/*
+Align uintptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
-align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
-	return rawptr(align_backward_uintptr(uintptr(ptr), align))
+align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
+	assert(is_power_of_two(align))
+	return ptr & ~(align-1)
 }
 
+/*
+Align rawptr backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
-align_backward_uintptr :: proc(ptr, align: uintptr) -> uintptr {
-	return align_forward_uintptr(ptr - align + 1, align)
+align_backward :: proc(ptr: rawptr, align: uintptr) -> rawptr {
+	return rawptr(align_backward_uintptr(uintptr(ptr), align))
 }
 
+/*
+Align int backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_backward_int :: proc(ptr, align: int) -> int {
 	return int(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
+
+/*
+Align uint backwards.
+
+This procedure returns the previous address before `ptr`, that is located on the
+alignment boundary specified by `align`. If `ptr` is already aligned to `align`
+bytes, `ptr` is returned.
+
+The specified alignment must be a power of 2.
+*/
 @(require_results)
 align_backward_uint :: proc(ptr, align: uint) -> uint {
 	return uint(align_backward_uintptr(uintptr(ptr), uintptr(align)))
 }
 
+/*
+Create a context with a given allocator.
+
+This procedure returns a copy of the current context with the allocator replaced
+by the allocator `a`.
+*/
 @(require_results)
 context_from_allocator :: proc(a: Allocator) -> type_of(context) {
 	context.allocator = a
 	return context
 }
 
+/*
+Copy the value from a pointer into a value.
+
+This procedure copies the object of type `T` pointed to by the pointer `ptr`
+into a new stack-allocated value and returns that value.
+*/
 @(require_results)
 reinterpret_copy :: proc "contextless" ($T: typeid, ptr: rawptr) -> (value: T) {
 	copy(&value, ptr, size_of(T))
 	return
 }
 
+/*
+Dynamic array with a fixed capacity buffer.
 
+This type represents dynamic arrays with a fixed-size backing buffer. Upon
+allocating memory beyond reaching the maximum capacity, allocations from fixed
+byte buffers return `nil` and no error.
+*/
 Fixed_Byte_Buffer :: distinct [dynamic]byte
 
+/*
+Create a fixed byte buffer from a slice.
+*/
 @(require_results)
 make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buffer {
 	s := transmute(Raw_Slice)backing
@@ -264,40 +631,60 @@ make_fixed_byte_buffer :: proc "contextless" (backing: []byte) -> Fixed_Byte_Buf
 	return transmute(Fixed_Byte_Buffer)d
 }
 
+/*
+General-purpose align formula.
 
-
+This procedure is equivalent to `align_forward`, but it does not require the
+alignment to be a power of two.
+*/
 @(require_results)
 align_formula :: proc "contextless" (size, align: int) -> int {
 	result := size + align-1
 	return result - result%align
 }
 
+/*
+Calculate the padding for header preceding aligned data.
+
+This procedure returns the padding, following the specified pointer `ptr` that
+will be able to fit in a header of the size `header_size`, immediately
+preceding the memory region, aligned on a boundary specified by `align`. See
+the following diagram for a visual representation.
+
+        header size
+	    |<------>|
+	+---+--------+------------- - - -
+	    | HEADER |  DATA...
+	+---+--------+------------- - - -
+	^            ^
+	|<---------->|
+	|  padding   |
+	ptr          aligned ptr
+
+The function takes in `ptr` and `header_size`, as well as the required
+alignment for `DATA`. The return value of the function is the padding between
+`ptr` and `aligned_ptr` that will be able to fit the header.
+*/
 @(require_results)
 calc_padding_with_header :: proc "contextless" (ptr: uintptr, align: uintptr, header_size: int) -> int {
 	p, a := ptr, align
 	modulo := p & (a-1)
-
 	padding := uintptr(0)
 	if modulo != 0 {
 		padding = a - modulo
 	}
-
 	needed_space := uintptr(header_size)
 	if padding < needed_space {
 		needed_space -= padding
-
 		if needed_space & (a-1) > 0 {
 			padding += align * (1+(needed_space/align))
 		} else {
 			padding += align * (needed_space/align)
 		}
 	}
-
 	return int(padding)
 }
 
-
-
 @(require_results, deprecated="prefer 'slice.clone'")
 clone_slice :: proc(slice: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> (new_slice: T) {
 	new_slice, _ = make(T, len(slice), allocator, loc)
diff --git a/core/mem/mutex_allocator.odin b/core/mem/mutex_allocator.odin
index 6ede219ac..b8062bca1 100644
--- a/core/mem/mutex_allocator.odin
+++ b/core/mem/mutex_allocator.odin
@@ -3,17 +3,31 @@ package mem
 
 import "core:sync"
 
+/*
+The data for mutex allocator.
+*/
 Mutex_Allocator :: struct {
 	backing: Allocator,
 	mutex:   sync.Mutex,
 }
 
+/*
+Initialize the mutex allocator.
+
+This procedure initializes the mutex allocator using `backin_allocator` as the
+allocator that will be used to pass all allocation requests through.
+*/
 mutex_allocator_init :: proc(m: ^Mutex_Allocator, backing_allocator: Allocator) {
 	m.backing = backing_allocator
 	m.mutex = {}
 }
 
+/*
+Mutex allocator.
 
+The mutex allocator is a wrapper for allocators that is used to serialize all
+allocator requests across multiple threads.
+*/
 @(require_results)
 mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	return Allocator{
@@ -22,11 +36,16 @@ mutex_allocator :: proc(m: ^Mutex_Allocator) -> Allocator {
 	}
 }
 
-mutex_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                             size, alignment: int,
-                             old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+mutex_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size: int,
+	alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	m := (^Mutex_Allocator)(allocator_data)
-
 	sync.mutex_guard(&m.mutex)
 	return m.backing.procedure(m.backing.data, mode, size, alignment, old_memory, old_size, loc)
 }
diff --git a/core/mem/raw.odin b/core/mem/raw.odin
index f56206957..41c91555e 100644
--- a/core/mem/raw.odin
+++ b/core/mem/raw.odin
@@ -3,26 +3,100 @@ package mem
 import "base:builtin"
 import "base:runtime"
 
-Raw_Any           :: runtime.Raw_Any
-Raw_String        :: runtime.Raw_String
-Raw_Cstring       :: runtime.Raw_Cstring
-Raw_Slice         :: runtime.Raw_Slice
+/*
+Memory layout of the `any` type.
+*/
+Raw_Any :: runtime.Raw_Any
+
+/*
+Memory layout of the `string` type.
+*/
+Raw_String :: runtime.Raw_String
+
+/*
+Memory layout of the `cstring` type.
+*/
+Raw_Cstring :: runtime.Raw_Cstring
+
+/*
+Memory layout of `[]T` types.
+*/
+Raw_Slice :: runtime.Raw_Slice
+
+/*
+Memory layout of `[dynamic]T` types.
+*/
 Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
-Raw_Map           :: runtime.Raw_Map
-Raw_Soa_Pointer   :: runtime.Raw_Soa_Pointer
 
-Raw_Complex32     :: runtime.Raw_Complex32
-Raw_Complex64     :: runtime.Raw_Complex64
-Raw_Complex128    :: runtime.Raw_Complex128
-Raw_Quaternion64  :: runtime.Raw_Quaternion64
+/*
+Memory layout of `map[K]V` types.
+*/
+Raw_Map :: runtime.Raw_Map
+
+/*
+Memory layout of `#soa []T` types.
+*/
+Raw_Soa_Pointer :: runtime.Raw_Soa_Pointer
+
+/*
+Memory layout of the `complex32` type.
+*/
+Raw_Complex32 :: runtime.Raw_Complex32
+
+/*
+Memory layout of the `complex64` type.
+*/
+Raw_Complex64 :: runtime.Raw_Complex64
+
+/*
+Memory layout of the `complex128` type.
+*/
+Raw_Complex128 :: runtime.Raw_Complex128
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64 :: runtime.Raw_Quaternion64
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128 :: runtime.Raw_Quaternion128
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256 :: runtime.Raw_Quaternion256
-Raw_Quaternion64_Vector_Scalar  :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion64` type.
+*/
+Raw_Quaternion64_Vector_Scalar :: runtime.Raw_Quaternion64_Vector_Scalar
+
+/*
+Memory layout of the `quaternion128` type.
+*/
 Raw_Quaternion128_Vector_Scalar :: runtime.Raw_Quaternion128_Vector_Scalar
+
+/*
+Memory layout of the `quaternion256` type.
+*/
 Raw_Quaternion256_Vector_Scalar :: runtime.Raw_Quaternion256_Vector_Scalar
 
+/*
+Create a value of the any type.
+
+This procedure creates a value with type `any` that points to an object with
+typeid `id` located at an address specified by `data`.
+*/
 make_any :: proc "contextless" (data: rawptr, id: typeid) -> any {
 	return transmute(any)Raw_Any{data, id}
 }
 
+/*
+Obtain pointer to the data.
+
+This procedure returns the pointer to the data of a slice, string, or a dynamic
+array.
+*/
 raw_data :: builtin.raw_data
diff --git a/core/mem/rollback_stack_allocator.odin b/core/mem/rollback_stack_allocator.odin
index f5e428d87..43ef10fe9 100644
--- a/core/mem/rollback_stack_allocator.odin
+++ b/core/mem/rollback_stack_allocator.odin
@@ -1,52 +1,36 @@
 package mem
 
-// The Rollback Stack Allocator was designed for the test runner to be fast,
-// able to grow, and respect the Tracking Allocator's requirement for
-// individual frees. It is not overly concerned with fragmentation, however.
-//
-// It has support for expansion when configured with a block allocator and
-// limited support for out-of-order frees.
-//
-// Allocation has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks.
-//
-// Allocation follows a first-fit strategy when there are multiple memory
-// blocks.
-//
-// Freeing has constant-time best and usual case performance.
-// At worst, it is linear according to the number of memory blocks and number
-// of freed items preceding the last item in a block.
-//
-// Resizing has constant-time performance, if it's the last item in a block, or
-// the new size is smaller. Naturally, this becomes linear-time if there are
-// multiple blocks to search for the pointer's owning block. Otherwise, the
-// allocator defaults to a combined alloc & free operation internally.
-//
-// Out-of-order freeing is accomplished by collapsing a run of freed items
-// from the last allocation backwards.
-//
-// Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
-// the requested alignment.
-
 import "base:runtime"
 
+/*
+Rollback stack default block size.
+*/
 ROLLBACK_STACK_DEFAULT_BLOCK_SIZE :: 4 * Megabyte
 
-// This limitation is due to the size of `prev_ptr`, but it is only for the
-// head block; any allocation in excess of the allocator's `block_size` is
-// valid, so long as the block allocator can handle it.
-//
-// This is because allocations over the block size are not split up if the item
-// within is freed; they are immediately returned to the block allocator.
-ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
+/*
+Rollback stack max head block size.
 
+This limitation is due to the size of `prev_ptr`, but it is only for the
+head block; any allocation in excess of the allocator's `block_size` is
+valid, so long as the block allocator can handle it.
+
+This is because allocations over the block size are not split up if the item
+within is freed; they are immediately returned to the block allocator.
+*/
+ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE :: 2 * Gigabyte
 
+/*
+Allocation header of the rollback stack allocator.
+*/
 Rollback_Stack_Header :: bit_field u64 {
 	prev_offset:  uintptr | 32,
 	is_free:         bool |  1,
 	prev_ptr:     uintptr | 31,
 }
 
+/*
+Block header of the rollback stack allocator.
+*/
 Rollback_Stack_Block :: struct {
 	next_block: ^Rollback_Stack_Block,
 	last_alloc: rawptr,
@@ -54,13 +38,15 @@ Rollback_Stack_Block :: struct {
 	buffer: []byte,
 }
 
+/*
+Rollback stack allocator data.
+*/
 Rollback_Stack :: struct {
 	head: ^Rollback_Stack_Block,
 	block_size: int,
 	block_allocator: Allocator,
 }
 
-
 @(private="file", require_results)
 rb_ptr_in_bounds :: proc(block: ^Rollback_Stack_Block, ptr: rawptr) -> bool {
 	start := raw_data(block.buffer)
@@ -110,6 +96,9 @@ rb_rollback_block :: proc(block: ^Rollback_Stack_Block, header: ^Rollback_Stack_
 	}
 }
 
+/*
+Free memory to a rollback stack allocator.
+*/
 @(private="file", require_results)
 rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	parent, block, header := rb_find_ptr(stack, ptr) or_return
@@ -128,6 +117,9 @@ rb_free :: proc(stack: ^Rollback_Stack, ptr: rawptr) -> Allocator_Error {
 	return nil
 }
 
+/*
+Free all memory owned by the rollback stack allocator.
+*/
 @(private="file")
 rb_free_all :: proc(stack: ^Rollback_Stack) {
 	for block := stack.head.next_block; block != nil; /**/ {
@@ -141,45 +133,75 @@ rb_free_all :: proc(stack: ^Rollback_Stack) {
 	stack.head.offset = 0
 }
 
-@(private="file", require_results)
-rb_resize :: proc(stack: ^Rollback_Stack, ptr: rawptr, old_size, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
-	if ptr != nil {
-		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
-			// `block.offset` should never underflow because it is contingent
-			// on `old_size` in the first place, assuming sane arguments.
-			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
-
-			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
-				// Prevent singleton allocations from fragmenting by forbidding
-				// them to shrink, removing the possibility of overflow bugs.
-				if len(block.buffer) <= stack.block_size {
-					block.offset += cast(uintptr)size - cast(uintptr)old_size
-				}
-				#no_bounds_check return (cast([^]byte)ptr)[:size], nil
-			}
-		}
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
 	}
+	return raw_data(bytes), err
+}
 
-	result = rb_alloc(stack, size, alignment) or_return
-	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
-	err = rb_free(stack, ptr)
+/*
+Allocate memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]byte, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	if bytes != nil {
+		zero_slice(bytes)
+	}
+	return bytes, err
+}
 
-	return
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
+	return raw_data(bytes), err
 }
 
-@(private="file", require_results)
-rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byte, err: Allocator_Error) {
+/*
+Allocate non-initialized memory using the rollback stack allocator.
+*/
+@(require_results)
+rb_alloc_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
 	parent: ^Rollback_Stack_Block
 	for block := stack.head; /**/; block = block.next_block {
 		when !ODIN_DISABLE_ASSERT {
 			allocated_new_block: bool
 		}
-
 		if block == nil {
 			if stack.block_allocator.procedure == nil {
 				return nil, .Out_Of_Memory
 			}
-
 			minimum_size_required := size_of(Rollback_Stack_Header) + size + alignment - 1
 			new_block_size := max(minimum_size_required, stack.block_size)
 			block = rb_make_block(new_block_size, stack.block_allocator) or_return
@@ -188,10 +210,8 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 				allocated_new_block = true
 			}
 		}
-
 		start := raw_data(block.buffer)[block.offset:]
 		padding := cast(uintptr)calc_padding_with_header(cast(uintptr)start, cast(uintptr)alignment, size_of(Rollback_Stack_Header))
-
 		if block.offset + padding + cast(uintptr)size > cast(uintptr)len(block.buffer) {
 			when !ODIN_DISABLE_ASSERT {
 				if allocated_new_block {
@@ -201,54 +221,150 @@ rb_alloc :: proc(stack: ^Rollback_Stack, size, alignment: int) -> (result: []byt
 			parent = block
 			continue
 		}
-
 		header := cast(^Rollback_Stack_Header)(start[padding - size_of(Rollback_Stack_Header):])
 		ptr := start[padding:]
-
 		header^ = {
 			prev_offset = block.offset,
 			prev_ptr = uintptr(0) if block.last_alloc == nil else cast(uintptr)block.last_alloc - cast(uintptr)raw_data(block.buffer),
 			is_free = false,
 		}
-
 		block.last_alloc = ptr
 		block.offset += padding + cast(uintptr)size
-
 		if len(block.buffer) > stack.block_size {
 			// This block exceeds the allocator's standard block size and is considered a singleton.
 			// Prevent any further allocations on it.
 			block.offset = cast(uintptr)len(block.buffer)
 		}
-		
 		#no_bounds_check return ptr[:size], nil
 	}
-
 	return nil, .Out_Of_Memory
 }
 
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	if bytes != nil {
+		if old_ptr == nil {
+			zero_slice(bytes)
+		} else if size > old_size {
+			zero_slice(bytes[old_size:])
+		}
+	}
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator.
+*/
+@(require_results)
+rb_resize_bytes :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> ([]u8, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, old_memory, size, alignment, loc)
+	if bytes != nil {
+		if old_memory == nil {
+			zero_slice(bytes)
+		} else if size > len(old_memory) {
+			zero_slice(bytes[len(old_memory):])
+		}
+	}
+	return bytes, err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_ptr: rawptr,
+	old_size: int,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (rawptr, Allocator_Error) {
+	bytes, err := rb_resize_bytes_non_zeroed(stack, byte_slice(old_ptr, old_size), size, alignment, loc)
+	return raw_data(bytes), err
+}
+
+/*
+Resize an allocation owned by rollback stack allocator without explicit
+zero-initialization.
+*/
+@(require_results)
+rb_resize_bytes_non_zeroed :: proc(
+	stack: ^Rollback_Stack,
+	old_memory: []byte,
+	size: int,
+	alignment := DEFAULT_ALIGNMENT,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
+	old_size := len(old_memory)
+	ptr := raw_data(old_memory)
+	assert(size >= 0, "Size must be positive or zero.", loc)
+	assert(old_size >= 0, "Old size must be positive or zero.", loc)
+	assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", loc)
+	if ptr != nil {
+		if block, _, ok := rb_find_last_alloc(stack, ptr); ok {
+			// `block.offset` should never underflow because it is contingent
+			// on `old_size` in the first place, assuming sane arguments.
+			assert(block.offset >= cast(uintptr)old_size, "Rollback Stack Allocator received invalid `old_size`.")
+			if block.offset + cast(uintptr)size - cast(uintptr)old_size < cast(uintptr)len(block.buffer) {
+				// Prevent singleton allocations from fragmenting by forbidding
+				// them to shrink, removing the possibility of overflow bugs.
+				if len(block.buffer) <= stack.block_size {
+					block.offset += cast(uintptr)size - cast(uintptr)old_size
+				}
+				#no_bounds_check return (ptr)[:size], nil
+			}
+		}
+	}
+	result = rb_alloc_bytes_non_zeroed(stack, size, alignment) or_return
+	runtime.mem_copy_non_overlapping(raw_data(result), ptr, old_size)
+	err = rb_free(stack, ptr)
+	return
+}
+
 @(private="file", require_results)
 rb_make_block :: proc(size: int, allocator: Allocator) -> (block: ^Rollback_Stack_Block, err: Allocator_Error) {
 	buffer := runtime.mem_alloc(size_of(Rollback_Stack_Block) + size, align_of(Rollback_Stack_Block), allocator) or_return
-
 	block = cast(^Rollback_Stack_Block)raw_data(buffer)
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
 	return
 }
 
-
+/*
+Initialize the rollback stack allocator using a fixed backing buffer.
+*/
 rollback_stack_init_buffered :: proc(stack: ^Rollback_Stack, buffer: []byte, location := #caller_location) {
 	MIN_SIZE :: size_of(Rollback_Stack_Block) + size_of(Rollback_Stack_Header) + size_of(rawptr)
 	assert(len(buffer) >= MIN_SIZE, "User-provided buffer to Rollback Stack Allocator is too small.", location)
-
 	block := cast(^Rollback_Stack_Block)raw_data(buffer)
 	block^ = {}
 	#no_bounds_check block.buffer = buffer[size_of(Rollback_Stack_Block):]
-
 	stack^ = {}
 	stack.head = block
 	stack.block_size = len(block.buffer)
 }
 
+/*
+Initialize the rollback stack alocator using a backing block allocator.
+*/
 rollback_stack_init_dynamic :: proc(
 	stack: ^Rollback_Stack,
 	block_size : int = ROLLBACK_STACK_DEFAULT_BLOCK_SIZE,
@@ -261,22 +377,25 @@ rollback_stack_init_dynamic :: proc(
 		// size is insufficient; check only on platforms with big enough ints.
 		assert(block_size <= ROLLBACK_STACK_MAX_HEAD_BLOCK_SIZE, "Rollback Stack Allocators cannot support head blocks larger than 2 gigabytes.", location)
 	}
-
 	block := rb_make_block(block_size, block_allocator) or_return
-
 	stack^ = {}
 	stack.head = block
 	stack.block_size = block_size
 	stack.block_allocator = block_allocator
-
 	return nil
 }
 
+/*
+Initialize the rollback stack.
+*/
 rollback_stack_init :: proc {
 	rollback_stack_init_buffered,
 	rollback_stack_init_dynamic,
 }
 
+/*
+Destroy a rollback stack.
+*/
 rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	if stack.block_allocator.procedure != nil {
 		rb_free_all(stack)
@@ -285,6 +404,37 @@ rollback_stack_destroy :: proc(stack: ^Rollback_Stack) {
 	stack^ = {}
 }
 
+/*
+Rollback stack allocator.
+
+The Rollback Stack Allocator was designed for the test runner to be fast,
+able to grow, and respect the Tracking Allocator's requirement for
+individual frees. It is not overly concerned with fragmentation, however.
+
+It has support for expansion when configured with a block allocator and
+limited support for out-of-order frees.
+
+Allocation has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks.
+
+Allocation follows a first-fit strategy when there are multiple memory
+blocks.
+
+Freeing has constant-time best and usual case performance.
+At worst, it is linear according to the number of memory blocks and number
+of freed items preceding the last item in a block.
+
+Resizing has constant-time performance, if it's the last item in a block, or
+the new size is smaller. Naturally, this becomes linear-time if there are
+multiple blocks to search for the pointer's owning block. Otherwise, the
+allocator defaults to a combined alloc & free operation internally.
+
+Out-of-order freeing is accomplished by collapsing a run of freed items
+from the last allocation backwards.
+
+Each allocation has an overhead of 8 bytes and any extra bytes to satisfy
+the requested alignment.
+*/
 @(require_results)
 rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 	return Allocator {
@@ -294,48 +444,37 @@ rollback_stack_allocator :: proc(stack: ^Rollback_Stack) -> Allocator {
 }
 
 @(require_results)
-rollback_stack_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                      size, alignment: int,
-                                      old_memory: rawptr, old_size: int, location := #caller_location,
+rollback_stack_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
 ) -> (result: []byte, err: Allocator_Error) {
 	stack := cast(^Rollback_Stack)allocator_data
-
 	switch mode {
-	case .Alloc, .Alloc_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_alloc(stack, size, alignment) or_return
-
-		if mode == .Alloc {
-			zero_slice(result)
-		}
-
+	case .Alloc:
+		return rb_alloc_bytes(stack, size, alignment, loc)
+	case .Alloc_Non_Zeroed:
+		return rb_alloc_bytes_non_zeroed(stack, size, alignment, loc)
 	case .Free:
-		err = rb_free(stack, old_memory)
-
+		return nil, rb_free(stack, old_memory)
 	case .Free_All:
 		rb_free_all(stack)
-
-	case .Resize, .Resize_Non_Zeroed:
-		assert(size >= 0, "Size must be positive or zero.", location)
-		assert(old_size >= 0, "Old size must be positive or zero.", location)
-		assert(is_power_of_two(cast(uintptr)alignment), "Alignment must be a power of two.", location)
-		result = rb_resize(stack, old_memory, old_size, size, alignment) or_return
-
-		#no_bounds_check if mode == .Resize && size > old_size {
-			zero_slice(result[old_size:])
-		}
-
+		return nil, nil
+	case .Resize:
+		return rb_resize_bytes(stack, byte_slice(old_memory, old_size), size, alignment, loc)
+	case .Resize_Non_Zeroed:
+		return rb_resize_bytes_non_zeroed(stack, byte_slice(old_memory, old_size), size, alignment, loc)
 	case .Query_Features:
 		set := (^Allocator_Mode_Set)(old_memory)
 		if set != nil {
 			set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Free_All, .Resize, .Resize_Non_Zeroed}
 		}
 		return nil, nil
-
 	case .Query_Info:
 		return nil, .Mode_Not_Implemented
 	}
-
 	return
 }
diff --git a/core/mem/tracking_allocator.odin b/core/mem/tracking_allocator.odin
index 7a95888dc..5da38e62f 100644
--- a/core/mem/tracking_allocator.odin
+++ b/core/mem/tracking_allocator.odin
@@ -4,50 +4,85 @@ package mem
 import "base:runtime"
 import "core:sync"
 
+/*
+Allocation entry for the tracking allocator.
+
+This structure stores the data related to an allocation.
+*/
 Tracking_Allocator_Entry :: struct {
-	memory:    rawptr,
-	size:      int,
+	// Pointer to an allocated region.
+	memory: rawptr,
+	// Size of the allocated memory region.
+	size: int,
+	// Requested alignment.
 	alignment: int,
-	mode:      Allocator_Mode,
-	err:       Allocator_Error,
+	// Mode of the operation.
+	mode: Allocator_Mode,
+	// Error.
+	err: Allocator_Error,
+	// Location of the allocation.
 	location:  runtime.Source_Code_Location,
 }
+
+/*
+Bad free entry for a tracking allocator.
+*/
 Tracking_Allocator_Bad_Free_Entry :: struct {
-	memory:   rawptr,
+	// Pointer, on which free operation was called.
+	memory: rawptr,
+	// The source location of where the operation was called.
 	location: runtime.Source_Code_Location,
 }
+
+/*
+Tracking allocator data.
+*/
 Tracking_Allocator :: struct {
-	backing:           Allocator,
-	allocation_map:    map[rawptr]Tracking_Allocator_Entry,
-	bad_free_array:    [dynamic]Tracking_Allocator_Bad_Free_Entry,
-	mutex:             sync.Mutex,
+	backing: Allocator,
+	allocation_map: map[rawptr]Tracking_Allocator_Entry,
+	bad_free_array: [dynamic]Tracking_Allocator_Bad_Free_Entry,
+	mutex: sync.Mutex,
 	clear_on_free_all: bool,
-
-	total_memory_allocated:   i64,
-	total_allocation_count:   i64,
-	total_memory_freed:       i64,
-	total_free_count:         i64,
-	peak_memory_allocated:    i64,
+	total_memory_allocated: i64,
+	total_allocation_count: i64,
+	total_memory_freed: i64,
+	total_free_count: i64,
+	peak_memory_allocated: i64,
 	current_memory_allocated: i64,
 }
 
+/*
+Initialize the tracking allocator.
+
+This procedure initializes the tracking allocator `t` with a backing allocator
+specified with `backing_allocator`. The `internals_allocator` will used to
+allocate the tracked data.
+*/
 tracking_allocator_init :: proc(t: ^Tracking_Allocator, backing_allocator: Allocator, internals_allocator := context.allocator) {
 	t.backing = backing_allocator
 	t.allocation_map.allocator = internals_allocator
 	t.bad_free_array.allocator = internals_allocator
-
 	if .Free_All in query_features(t.backing) {
 		t.clear_on_free_all = true
 	}
 }
 
+/*
+Destroy the tracking allocator.
+*/
 tracking_allocator_destroy :: proc(t: ^Tracking_Allocator) {
 	delete(t.allocation_map)
 	delete(t.bad_free_array)
 }
 
+/*
+Clear the tracking allocator.
+
+This procedure clears the tracked data from a tracking allocator.
 
-// Clear only the current allocation data while keeping the totals intact.
+**Note**: This procedure clears only the current allocation data while keeping
+the totals intact.
+*/
 tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
@@ -56,7 +91,11 @@ tracking_allocator_clear :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 }
 
-// Reset all of a Tracking Allocator's allocation data back to zero.
+/*
+Reset the tracking allocator.
+
+Reset all of a Tracking Allocator's allocation data back to zero.
+*/
 tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_lock(&t.mutex)
 	clear(&t.allocation_map)
@@ -70,6 +109,39 @@ tracking_allocator_reset :: proc(t: ^Tracking_Allocator) {
 	sync.mutex_unlock(&t.mutex)
 }
 
+/*
+Tracking allocator.
+
+The tracking allocator is an allocator wrapper that tracks memory allocations.
+This allocator stores all the allocations in a map. Whenever a pointer that's
+not inside of the map is freed, the `bad_free_array` entry is added.
+
+An example of how to use the `Tracking_Allocator` to track subsequent allocations
+in your program and report leaks and bad frees:
+
+Example:
+
+	package foo
+
+	import "core:mem"
+	import "core:fmt"
+
+	main :: proc() {
+		track: mem.Tracking_Allocator
+		mem.tracking_allocator_init(&track, context.allocator)
+		defer mem.tracking_allocator_destroy(&track)
+		context.allocator = mem.tracking_allocator(&track)
+
+		do_stuff()
+
+		for _, leak in track.allocation_map {
+			fmt.printf("%v leaked %m\n", leak.location, leak.size)
+		}
+		for bad_free in track.bad_free_array {
+			fmt.printf("%v allocation %p was freed badly\n", bad_free.location, bad_free.memory)
+		}
+	}
+*/
 @(require_results)
 tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	return Allocator{
@@ -78,9 +150,14 @@ tracking_allocator :: proc(data: ^Tracking_Allocator) -> Allocator {
 	}
 }
 
-tracking_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
-                                size, alignment: int,
-                                old_memory: rawptr, old_size: int, loc := #caller_location) -> (result: []byte, err: Allocator_Error) {
+tracking_allocator_proc :: proc(
+	allocator_data: rawptr,
+	mode: Allocator_Mode,
+	size, alignment: int,
+	old_memory: rawptr,
+	old_size: int,
+	loc := #caller_location,
+) -> (result: []byte, err: Allocator_Error) {
 	track_alloc :: proc(data: ^Tracking_Allocator, entry: ^Tracking_Allocator_Entry) {
 		data.total_memory_allocated += i64(entry.size)
 		data.total_allocation_count += 1
diff --git a/core/odin/parser/parser.odin b/core/odin/parser/parser.odin
index 9dfca40b5..d42766bde 100644
--- a/core/odin/parser/parser.odin
+++ b/core/odin/parser/parser.odin
@@ -2302,6 +2302,16 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 			bd.name = name.text
 			return bd
 
+		case "caller_expression":
+			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
+			bd.tok  = tok
+			bd.name = name.text
+
+			if peek_token_kind(p, .Open_Paren) {
+				return parse_call_expr(p, bd)
+			}
+			return bd
+
 		case "location", "exists", "load", "load_directory", "load_hash", "hash", "assert", "panic", "defined", "config":
 			bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name))
 			bd.tok  = tok
diff --git a/core/os/os_freebsd.odin b/core/os/os_freebsd.odin
index c05a06129..241f42c0b 100644
--- a/core/os/os_freebsd.odin
+++ b/core/os/os_freebsd.odin
@@ -920,7 +920,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 			return count
 		}
diff --git a/core/os/os_js.odin b/core/os/os_js.odin
index 5a544c3f7..8bf1d988b 100644
--- a/core/os/os_js.odin
+++ b/core/os/os_js.odin
@@ -3,33 +3,38 @@ package os
 
 import "base:runtime"
 
+foreign import "odin_env"
+
 @(require_results)
 is_path_separator :: proc(c: byte) -> bool {
 	return c == '/' || c == '\\'
 }
 
+Handle :: distinct u32
+
+stdout: Handle = 1
+stderr: Handle = 2
+
 @(require_results)
 open :: proc(path: string, mode: int = O_RDONLY, perm: int = 0) -> (Handle, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
 close :: proc(fd: Handle) -> Error {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 
 flush :: proc(fd: Handle) -> (err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	return nil
 }
 
-
-
 write :: proc(fd: Handle, data: []byte) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private="file")
-read_console :: proc(handle: Handle, b: []byte) -> (n: int, err: Error) {
-	unimplemented("core:os procedure not supported on JS target")
+	foreign odin_env {
+		@(link_name="write")
+		_write :: proc "contextless" (fd: Handle, p: []byte) ---
+	}
+	_write(fd, data)
+	return len(data), nil
 }
 
 read :: proc(fd: Handle, data: []byte) -> (int, Error) {
@@ -45,19 +50,6 @@ file_size :: proc(fd: Handle) -> (i64, Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-
-@(private)
-MAX_RW :: 1<<30
-
-@(private)
-pread :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-@(private)
-pwrite :: proc(fd: Handle, data: []byte, offset: i64) -> (int, Error) {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
 read_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
@@ -65,16 +57,6 @@ write_at :: proc(fd: Handle, data: []byte, offset: i64) -> (n: int, err: Error)
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-stdout: Handle = 1
-stderr: Handle = 2
-
-@(require_results)
-get_std_handle :: proc "contextless" (h: uint) -> Handle {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 @(require_results)
 exists :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
@@ -90,9 +72,6 @@ is_dir :: proc(path: string) -> bool {
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-// NOTE(tetra): GetCurrentDirectory is not thread safe with SetCurrentDirectory and GetFullPathName
-//@private cwd_lock := win32.SRWLOCK{} // zero is initialized
-
 @(require_results)
 get_current_directory :: proc(allocator := context.allocator) -> string {
 	unimplemented("core:os procedure not supported on JS target")
@@ -118,18 +97,6 @@ remove_directory :: proc(path: string) -> (err: Error) {
 }
 
 
-
-@(private, require_results)
-is_abs :: proc(path: string) -> bool {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-@(private, require_results)
-fix_long_path :: proc(path: string) -> string {
-	unimplemented("core:os procedure not supported on JS target")
-}
-
-
 link :: proc(old_name, new_name: string) -> (err: Error) {
 	unimplemented("core:os procedure not supported on JS target")
 }
@@ -169,7 +136,6 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F
 	unimplemented("core:os procedure not supported on JS target")
 }
 
-Handle    :: distinct uintptr
 File_Time :: distinct u64
 
 _Platform_Error :: enum i32 {
@@ -254,12 +220,7 @@ WSAECONNRESET             :: Platform_Error.WSAECONNRESET
 ERROR_FILE_IS_PIPE        :: General_Error.File_Is_Pipe
 ERROR_FILE_IS_NOT_DIR     :: General_Error.Not_Dir
 
-// "Argv" arguments converted to Odin strings
-args := _alloc_command_line_arguments()
-
-
-
-
+args: []string
 
 @(require_results)
 last_write_time :: proc(fd: Handle) -> (File_Time, Error) {
@@ -279,26 +240,14 @@ get_page_size :: proc() -> int {
 
 @(private, require_results)
 _processor_core_count :: proc() -> int {
-	unimplemented("core:os procedure not supported on JS target")
+	return 1
 }
 
 exit :: proc "contextless" (code: int) -> ! {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	unimplemented_contextless("core:os procedure not supported on JS target")
 }
 
-
-
 @(require_results)
 current_thread_id :: proc "contextless" () -> int {
-	context = runtime.default_context()
-	unimplemented("core:os procedure not supported on JS target")
+	return 0
 }
-
-
-
-@(require_results)
-_alloc_command_line_arguments :: proc() -> []string {
-	return nil
-}
-
diff --git a/core/os/os_netbsd.odin b/core/os/os_netbsd.odin
index a56c0b784..ba9b40fc3 100644
--- a/core/os/os_netbsd.odin
+++ b/core/os/os_netbsd.odin
@@ -978,7 +978,7 @@ get_page_size :: proc() -> int {
 _processor_core_count :: proc() -> int {
 	count : int = 0
 	count_size := size_of(count)
-	if _sysctlbyname("hw.logicalcpu", &count, &count_size, nil, 0) == 0 {
+	if _sysctlbyname("hw.ncpu", &count, &count_size, nil, 0) == 0 {
 		if count > 0 {
 			return count
 		}
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index b69c4a0e0..dbc84f8b7 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -93,7 +93,7 @@ Inputs:
 Returns:
 - res: A string created from the null-terminated byte pointer and length
 */
-string_from_null_terminated_ptr :: proc(ptr: [^]byte, len: int) -> (res: string) {
+string_from_null_terminated_ptr :: proc "contextless" (ptr: [^]byte, len: int) -> (res: string) {
 	s := string(ptr[:len])
 	s = truncate_to_byte(s, 0)
 	return s
@@ -139,7 +139,7 @@ NOTE: Failure to find the byte results in returning the entire string.
 Returns:
 - res: The truncated string
 */
-truncate_to_byte :: proc(str: string, b: byte) -> (res: string) {
+truncate_to_byte :: proc "contextless" (str: string, b: byte) -> (res: string) {
 	n := index_byte(str, b)
 	if n < 0 {
 		n = len(str)
@@ -261,7 +261,7 @@ Inputs:
 Returns:
 - result: `-1` if `lhs` comes first, `1` if `rhs` comes first, or `0` if they are equal
 */
-compare :: proc(lhs, rhs: string) -> (result: int) {
+compare :: proc "contextless" (lhs, rhs: string) -> (result: int) {
 	return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
 }
 /*
@@ -1447,7 +1447,7 @@ Output:
 	-1
 
 */
-index_byte :: proc(s: string, c: byte) -> (res: int) {
+index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.index_byte(transmute([]u8)s, c)
 }
 /*
@@ -1482,7 +1482,7 @@ Output:
 	-1
 
 */
-last_index_byte :: proc(s: string, c: byte) -> (res: int) {
+last_index_byte :: proc "contextless" (s: string, c: byte) -> (res: int) {
 	return #force_inline bytes.last_index_byte(transmute([]u8)s, c)
 }
 /*
@@ -1576,8 +1576,8 @@ Output:
 	-1
 
 */
-index :: proc(s, substr: string) -> (res: int) {
-	hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
+index :: proc "contextless" (s, substr: string) -> (res: int) {
+	hash_str_rabin_karp :: proc "contextless" (s: string) -> (hash: u32 = 0, pow: u32 = 1) {
 		for i := 0; i < len(s); i += 1 {
 			hash = hash*PRIME_RABIN_KARP + u32(s[i])
 		}
diff --git a/core/sync/chan/chan.odin b/core/sync/chan/chan.odin
index 53a3bff4b..c470d15f3 100644
--- a/core/sync/chan/chan.odin
+++ b/core/sync/chan/chan.odin
@@ -22,19 +22,17 @@ Raw_Chan :: struct {
 	allocator:       runtime.Allocator,
 	allocation_size: int,
 	msg_size:        u16,
-	closed:          b16, // atomic
+	closed:          b16, // guarded by `mutex`
 	mutex:           sync.Mutex,
 	r_cond:          sync.Cond,
 	w_cond:          sync.Cond,
-	r_waiting:       int,  // atomic
-	w_waiting:       int,  // atomic
+	r_waiting:       int,  // guarded by `mutex`
+	w_waiting:       int,  // guarded by `mutex`
 
 	// Buffered
 	queue: ^Raw_Queue,
 
 	// Unbuffered
-	r_mutex:         sync.Mutex,
-	w_mutex:         sync.Mutex,
 	unbuffered_data: rawptr,
 }
 
@@ -164,27 +162,30 @@ send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool) {
 	}
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
-		for c.queue.len == c.queue.cap {
-			sync.atomic_add(&c.w_waiting, 1)
+		for !c.closed && c.queue.len == c.queue.cap {
+			c.w_waiting += 1
 			sync.wait(&c.w_cond, &c.mutex)
-			sync.atomic_sub(&c.w_waiting, 1)
+			c.w_waiting -= 1
+		}
+
+		if c.closed {
+			return false
 		}
 
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 		}
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 		sync.wait(&c.w_cond, &c.mutex)
@@ -201,13 +202,13 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 	if c.queue != nil { // buffered
 		sync.guard(&c.mutex)
 		for c.queue.len == 0 {
-			if sync.atomic_load(&c.closed) {
+			if c.closed {
 				return
 			}
 
-			sync.atomic_add(&c.r_waiting, 1)
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 
 		msg := raw_queue_pop(c.queue)
@@ -215,27 +216,26 @@ recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> (ok: bool) {
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 		}
 		ok = true
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 
-		for !sync.atomic_load(&c.closed) &&
-		    sync.atomic_load(&c.w_waiting) == 0 {
-			sync.atomic_add(&c.r_waiting, 1)
+		for !c.closed &&
+			c.w_waiting == 0 {
+			c.r_waiting += 1
 			sync.wait(&c.r_cond, &c.mutex)
-			sync.atomic_sub(&c.r_waiting, 1)
+			c.r_waiting -= 1
 		}
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return
 		}
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 		sync.signal(&c.w_cond)
 		ok = true
@@ -255,21 +255,24 @@ try_send_raw :: proc "contextless" (c: ^Raw_Chan, msg_in: rawptr) -> (ok: bool)
 			return false
 		}
 
+		if c.closed {
+			return false
+		}
+
 		ok = raw_queue_push(c.queue, msg_in)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.w_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) {
+		if c.closed {
 			return false
 		}
 
 		mem.copy(c.unbuffered_data, msg_in, int(c.msg_size))
-		sync.atomic_add(&c.w_waiting, 1)
-		if sync.atomic_load(&c.r_waiting) > 0 {
+		c.w_waiting += 1
+		if c.r_waiting > 0 {
 			sync.signal(&c.r_cond)
 		}
 		sync.wait(&c.w_cond, &c.mutex)
@@ -294,21 +297,19 @@ try_recv_raw :: proc "contextless" (c: ^Raw_Chan, msg_out: rawptr) -> bool {
 			mem.copy(msg_out, msg, int(c.msg_size))
 		}
 
-		if sync.atomic_load(&c.w_waiting) > 0 {
+		if c.w_waiting > 0 {
 			sync.signal(&c.w_cond)
 		}
 		return true
 	} else if c.unbuffered_data != nil { // unbuffered
-		sync.guard(&c.r_mutex)
 		sync.guard(&c.mutex)
 
-		if sync.atomic_load(&c.closed) ||
-		   sync.atomic_load(&c.w_waiting) == 0 {
+		if c.closed || c.w_waiting == 0 {
 			return false
 		}
 
 		mem.copy(msg_out, c.unbuffered_data, int(c.msg_size))
-		sync.atomic_sub(&c.w_waiting, 1)
+		c.w_waiting -= 1
 
 		sync.signal(&c.w_cond)
 		return true
@@ -351,10 +352,10 @@ close :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return false
 	}
 	sync.guard(&c.mutex)
-	if sync.atomic_load(&c.closed) {
+	if c.closed {
 		return false
 	}
-	sync.atomic_store(&c.closed, true)
+	c.closed = true
 	sync.broadcast(&c.r_cond)
 	sync.broadcast(&c.w_cond)
 	return true
@@ -366,7 +367,7 @@ is_closed :: proc "contextless" (c: ^Raw_Chan) -> bool {
 		return true
 	}
 	sync.guard(&c.mutex)
-	return bool(sync.atomic_load(&c.closed))
+	return bool(c.closed)
 }
 
 
@@ -423,9 +424,9 @@ raw_queue_pop :: proc "contextless" (q: ^Raw_Queue) -> (data: rawptr) {
 can_recv :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	sync.guard(&c.mutex)
 	if is_buffered(c) {
-		return len(c) > 0
+		return c.queue.len > 0
 	}
-	return sync.atomic_load(&c.w_waiting) > 0
+	return c.w_waiting > 0
 }
 
 
@@ -435,7 +436,7 @@ can_send :: proc "contextless" (c: ^Raw_Chan) -> bool {
 	if is_buffered(c) {
 		return c.queue.len < c.queue.cap
 	}
-	return sync.atomic_load(&c.r_waiting) > 0
+	return c.w_waiting == 0
 }
 
 
@@ -484,4 +485,4 @@ select_raw :: proc "odin" (recvs: []^Raw_Chan, sends: []^Raw_Chan, send_msgs: []
 		ok = send_raw(sends[sel.idx], send_msgs[sel.idx])
 	}
 	return
-}
-\ No newline at end of file
+}
diff --git a/core/sync/extended.odin b/core/sync/extended.odin
index b446fefa0..30b1b2770 100644
--- a/core/sync/extended.odin
+++ b/core/sync/extended.odin
@@ -8,7 +8,7 @@ _ :: vg
 Wait group.
 
 Wait group is a synchronization primitive used by the waiting thread to wait,
-until a all working threads finish work.
+until all working threads finish work.
 
 The waiting thread first sets the number of working threads it will expect to
 wait for using `wait_group_add` call, and start waiting using `wait_group_wait`
@@ -35,7 +35,7 @@ Wait_Group :: struct #no_copy {
 /*
 Increment an internal counter of a wait group.
 
-This procedure atomicaly increments a number to the specified wait group's
+This procedure atomically increments a number to the specified wait group's
 internal counter by a specified amount. This operation can be done on any
 thread.
 */
@@ -48,12 +48,12 @@ wait_group_add :: proc "contextless" (wg: ^Wait_Group, delta: int) {
 
 	atomic_add(&wg.counter, delta)
 	if wg.counter < 0 {
-		_panic("sync.Wait_Group negative counter")
+		panic_contextless("sync.Wait_Group negative counter")
 	}
 	if wg.counter == 0 {
 		cond_broadcast(&wg.cond)
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
@@ -81,7 +81,7 @@ wait_group_wait :: proc "contextless" (wg: ^Wait_Group) {
 	if wg.counter != 0 {
 		cond_wait(&wg.cond, &wg.mutex)
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 }
@@ -105,7 +105,7 @@ wait_group_wait_with_timeout :: proc "contextless" (wg: ^Wait_Group, duration: t
 			return false
 		}
 		if wg.counter != 0 {
-			_panic("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
+			panic_contextless("sync.Wait_Group misuse: sync.wait_group_add called concurrently with sync.wait_group_wait")
 		}
 	}
 	return true
@@ -121,7 +121,7 @@ When `barrier_wait` procedure is called by any thread, that thread will block
 the execution, until all threads associated with the barrier reach the same
 point of execution and also call `barrier_wait`.
 
-when barrier is initialized, a `thread_count` parameter is passed, signifying
+When a barrier is initialized, a `thread_count` parameter is passed, signifying
 the amount of participant threads of the barrier. The barrier also keeps track
 of an internal atomic counter. When a thread calls `barrier_wait`, the internal
 counter is incremented. When the internal counter reaches `thread_count`, it is
@@ -208,7 +208,7 @@ Represents a thread synchronization primitive that, when signalled, releases one
 single waiting thread and then resets automatically to a state where it can be
 signalled again.
 
-When a thread calls `auto_reset_event_wait`, it's execution will be blocked,
+When a thread calls `auto_reset_event_wait`, its execution will be blocked,
 until the event is signalled by another thread. The call to
 `auto_reset_event_signal` wakes up exactly one thread waiting for the event.
 */
@@ -228,15 +228,15 @@ thread.
 */
 auto_reset_event_signal :: proc "contextless" (e: ^Auto_Reset_Event) {
 	old_status := atomic_load_explicit(&e.status, .Relaxed)
+	new_status := old_status + 1 if old_status < 1 else 1
 	for {
-		new_status := old_status + 1 if old_status < 1 else 1
 		if _, ok := atomic_compare_exchange_weak_explicit(&e.status, old_status, new_status, .Release, .Relaxed); ok {
 			break
 		}
-
-		if old_status < 0 {
-			sema_post(&e.sema)
-		}
+		cpu_relax()
+	}
+	if old_status < 0 {
+		sema_post(&e.sema)
 	}
 }
 
@@ -297,7 +297,7 @@ waiting to acquire the lock, exactly one of those threads is unblocked and
 allowed into the critical section.
 */
 ticket_mutex_unlock :: #force_inline proc "contextless" (m: ^Ticket_Mutex) {
-	atomic_add_explicit(&m.serving, 1, .Relaxed)
+	atomic_add_explicit(&m.serving, 1, .Release)
 }
 
 /*
@@ -331,8 +331,8 @@ Benaphore.
 
 A benaphore is a combination of an atomic variable and a semaphore that can
 improve locking efficiency in a no-contention system. Acquiring a benaphore
-lock doesn't call into an internal semaphore, if no other thread in a middle of
-a critical section.
+lock doesn't call into an internal semaphore, if no other thread is in the
+middle of a critical section.
 
 Once a lock on a benaphore is acquired by a thread, no other thread is allowed
 into any critical sections, associted with the same benaphore, until the lock
@@ -355,7 +355,7 @@ from entering any critical sections associated with the same benaphore, until
 until the lock is released.
 */
 benaphore_lock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
+	if atomic_add_explicit(&b.counter, 1, .Acquire) > 0 {
 		sema_wait(&b.sema)
 	}
 }
@@ -381,10 +381,10 @@ Release a lock on a benaphore.
 
 This procedure releases a lock on the specified benaphore. If any of the threads
 are waiting on the lock, exactly one thread is allowed into a critical section
-associated with the same banaphore.
+associated with the same benaphore.
 */
 benaphore_unlock :: proc "contextless" (b: ^Benaphore) {
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
+	if atomic_sub_explicit(&b.counter, 1, .Release) > 1 {
 		sema_post(&b.sema)
 	}
 }
@@ -418,8 +418,8 @@ benaphore_guard :: proc "contextless" (m: ^Benaphore) -> bool {
 /*
 Recursive benaphore.
 
-Recurisve benaphore is just like a plain benaphore, except it allows reentrancy
-into the critical section.
+A recursive benaphore is just like a plain benaphore, except it allows
+reentrancy into the critical section.
 
 When a lock is acquired on a benaphore, all other threads attempting to
 acquire a lock on the same benaphore will be blocked from any critical sections,
@@ -449,13 +449,15 @@ recursive benaphore, until the lock is released.
 */
 recursive_benaphore_lock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
-	if atomic_add_explicit(&b.counter, 1, .Acquire) > 1 {
-		if tid != b.owner {
-			sema_wait(&b.sema)
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		atomic_add_explicit(&b.counter, 1, .Relaxed)
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			break check_owner
 		}
+		sema_wait(&b.sema)
+		atomic_store_explicit(&b.owner, tid, .Release)
 	}
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 }
 
@@ -472,15 +474,14 @@ benaphore, until the lock is released.
 */
 recursive_benaphore_try_lock :: proc "contextless" (b: ^Recursive_Benaphore) -> bool {
 	tid := current_thread_id()
-	if b.owner == tid {
-		atomic_add_explicit(&b.counter, 1, .Acquire)
-	}
-
-	if v, _ := atomic_compare_exchange_strong_explicit(&b.counter, 0, 1, .Acquire, .Acquire); v != 0 {
+	check_owner: if tid != atomic_load_explicit(&b.owner, .Acquire) {
+		if _, ok := atomic_compare_exchange_strong_explicit(&b.owner, 0, tid, .Release, .Relaxed); ok {
+			atomic_add_explicit(&b.counter, 1, .Relaxed)
+			break check_owner
+		}
 		return false
 	}
 	// inside the lock
-	b.owner = tid
 	b.recursion += 1
 	return true
 }
@@ -494,14 +495,14 @@ for other threads for entering.
 */
 recursive_benaphore_unlock :: proc "contextless" (b: ^Recursive_Benaphore) {
 	tid := current_thread_id()
-	_assert(tid == b.owner, "tid != b.owner")
+	assert_contextless(tid == atomic_load_explicit(&b.owner, .Relaxed), "tid != b.owner")
 	b.recursion -= 1
 	recursion := b.recursion
+
 	if recursion == 0 {
-		b.owner = 0
-	}
-	if atomic_sub_explicit(&b.counter, 1, .Release) > 0 {
-		if recursion == 0 {
+		if atomic_sub_explicit(&b.counter, 1, .Relaxed) == 1 {
+			atomic_store_explicit(&b.owner, 0, .Release)
+		} else {
 			sema_post(&b.sema)
 		}
 	}
@@ -740,4 +741,4 @@ Make event available.
 one_shot_event_signal :: proc "contextless" (e: ^One_Shot_Event) {
 	atomic_store_explicit(&e.state, 1, .Release)
 	futex_broadcast(&e.state)
-}
-\ No newline at end of file
+}
diff --git a/core/sync/futex_darwin.odin b/core/sync/futex_darwin.odin
index 5b5f53c20..10ff7bfbb 100644
--- a/core/sync/futex_darwin.odin
+++ b/core/sync/futex_darwin.odin
@@ -12,6 +12,8 @@ foreign System {
 	// __ulock_wait is not available on 10.15
 	// See https://github.com/odin-lang/Odin/issues/1959
 	__ulock_wait  :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_us: u32) -> c.int ---
+	// >= MacOS 11.
+	__ulock_wait2 :: proc "c" (operation: u32, addr: rawptr, value: u64, timeout_ns: u64, value2: u64) -> c.int ---
 	__ulock_wake  :: proc "c" (operation: u32, addr: rawptr, wake_value: u64) -> c.int ---
 }
 
@@ -48,22 +50,29 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		case -ETIMEDOUT:
 			return false
 		case:
-			_panic("darwin.os_sync_wait_on_address_with_timeout failure")
+			panic_contextless("darwin.os_sync_wait_on_address_with_timeout failure")
 		}
 	} else {
 
-	timeout_ns := u32(duration)
-	s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns)
+	when darwin.ULOCK_WAIT_2_AVAILABLE {
+		timeout_ns := u64(duration)
+		s := __ulock_wait2(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_ns, 0)
+	} else {
+		timeout_us := u32(duration / time.Microsecond)
+		s := __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, f, u64(expected), timeout_us)
+	}
+
 	if s >= 0 {
 		return true
 	}
+
 	switch s {
 	case EINTR, EFAULT:
 		return true
 	case ETIMEDOUT:
 		return false
 	case:
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 	return true
 
@@ -83,7 +92,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 				return
 			case:
-				_panic("darwin.os_sync_wake_by_address_any failure")
+				panic_contextless("darwin.os_sync_wake_by_address_any failure")
 			}
 		}
 	} else {
@@ -99,7 +108,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			_panic("futex_wake_single failure")
+			panic_contextless("futex_wake_single failure")
 		}
 	}
 
@@ -119,7 +128,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 			case -ENOENT:
 				return
 			case:
-				_panic("darwin.os_sync_wake_by_address_all failure")
+				panic_contextless("darwin.os_sync_wake_by_address_all failure")
 			}
 		}
 	} else {
@@ -135,7 +144,7 @@ _futex_broadcast :: proc "contextless" (f: ^Futex) {
 		case ENOENT:
 			return
 		case:
-			_panic("futex_wake_all failure")
+			panic_contextless("futex_wake_all failure")
 		}
 	}
 
diff --git a/core/sync/futex_freebsd.odin b/core/sync/futex_freebsd.odin
index 4fbb33ee0..e3f95b146 100644
--- a/core/sync/futex_freebsd.odin
+++ b/core/sync/futex_freebsd.odin
@@ -21,7 +21,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 			continue
 		}
 
-		_panic("_futex_wait failure")
+		panic_contextless("_futex_wait failure")
 	}
 
 	unreachable()
@@ -44,14 +44,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 	}
 
-	_panic("_futex_wait_with_timeout failure")
+	panic_contextless("_futex_wait_with_timeout failure")
 }
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	errno := freebsd._umtx_op(f, .WAKE, 1, nil, nil)
 
 	if errno != nil {
-		_panic("_futex_signal failure")
+		panic_contextless("_futex_signal failure")
 	}
 }
 
@@ -59,6 +59,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	errno := freebsd._umtx_op(f, .WAKE, cast(c.ulong)max(i32), nil, nil)
 
 	if errno != nil {
-		_panic("_futex_broadcast failure")
+		panic_contextless("_futex_broadcast failure")
 	}
 }
diff --git a/core/sync/futex_linux.odin b/core/sync/futex_linux.odin
index 846a82486..52143880b 100644
--- a/core/sync/futex_linux.odin
+++ b/core/sync/futex_linux.odin
@@ -15,7 +15,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		return true
 	case:
 		// TODO(flysand): More descriptive panic messages based on the vlaue of `errno`
-		_panic("futex_wait failure")
+		panic_contextless("futex_wait failure")
 	}
 }
 
@@ -34,7 +34,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 	case .NONE, .EINTR, .EAGAIN:
 		return true
 	case:
-		_panic("futex_wait_with_timeout failure")
+		panic_contextless("futex_wait_with_timeout failure")
 	}
 }
 
@@ -44,7 +44,7 @@ _futex_signal :: proc "contextless" (futex: ^Futex) {
 	case .NONE:
 		return
 	case:
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
@@ -57,6 +57,6 @@ _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	case .NONE:
 		return
 	case:
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }
diff --git a/core/sync/futex_netbsd.odin b/core/sync/futex_netbsd.odin
index b98346434..e49b25b02 100644
--- a/core/sync/futex_netbsd.odin
+++ b/core/sync/futex_netbsd.odin
@@ -35,7 +35,7 @@ _futex_wait :: proc "contextless" (futex: ^Futex, expected: u32) -> bool {
 		case EINTR, EAGAIN:
 			return true
 		case:
-			_panic("futex_wait failure")
+			panic_contextless("futex_wait failure")
 		}	
 	}
 	return true
@@ -55,7 +55,7 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 		case ETIMEDOUT:
 			return false
 		case:
-			_panic("futex_wait_with_timeout failure")
+			panic_contextless("futex_wait_with_timeout failure")
 		}
 	}
 	return true
@@ -63,12 +63,12 @@ _futex_wait_with_timeout :: proc "contextless" (futex: ^Futex, expected: u32, du
 
 _futex_signal :: proc "contextless" (futex: ^Futex) {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, 1, 0, 0, 0); !ok {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
 _futex_broadcast :: proc "contextless" (futex: ^Futex)  {
 	if _, ok := intrinsics.syscall_bsd(unix.SYS___futex, uintptr(futex), FUTEX_WAKE_PRIVATE, uintptr(max(i32)), 0, 0, 0); !ok {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }
diff --git a/core/sync/futex_openbsd.odin b/core/sync/futex_openbsd.odin
index 83055b950..7d3cc8578 100644
--- a/core/sync/futex_openbsd.odin
+++ b/core/sync/futex_openbsd.odin
@@ -36,7 +36,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 		return false
 	}
 
-	_panic("futex_wait failure")
+	panic_contextless("futex_wait failure")
 }
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
@@ -62,14 +62,14 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 		return false
 	}
 
-	_panic("futex_wait_with_timeout failure")
+	panic_contextless("futex_wait_with_timeout failure")
 }
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, 1, nil)
 
 	if res == -1 {
-		_panic("futex_wake_single failure")
+		panic_contextless("futex_wake_single failure")
 	}
 }
 
@@ -77,6 +77,6 @@ _futex_broadcast :: proc "contextless" (f: ^Futex)  {
 	res := _unix_futex(f, FUTEX_WAKE_PRIVATE, u32(max(i32)), nil)
 
 	if res == -1 {
-		_panic("_futex_wake_all failure")
+		panic_contextless("_futex_wake_all failure")
 	}
 }
diff --git a/core/sync/futex_wasm.odin b/core/sync/futex_wasm.odin
index 013c425e8..0f9659a02 100644
--- a/core/sync/futex_wasm.odin
+++ b/core/sync/futex_wasm.odin
@@ -10,7 +10,7 @@ import "core:time"
 
 _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1)
 		return s != 0
@@ -19,7 +19,7 @@ _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool {
 
 _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration))
 		return s != 0
@@ -28,7 +28,7 @@ _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, durati
 
 _futex_signal :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1)
@@ -41,7 +41,7 @@ _futex_signal :: proc "contextless" (f: ^Futex) {
 
 _futex_broadcast :: proc "contextless" (f: ^Futex) {
 	when !intrinsics.has_target_feature("atomics") {
-		_panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
+		panic_contextless("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it")
 	} else {
 		loop: for {
 			s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0))
diff --git a/core/sync/primitives.odin b/core/sync/primitives.odin
index a22824481..f091de045 100644
--- a/core/sync/primitives.odin
+++ b/core/sync/primitives.odin
@@ -1,6 +1,5 @@
 package sync
 
-import "base:runtime"
 import "core:time"
 
 /*
@@ -390,7 +389,7 @@ recursive_mutex_guard :: proc "contextless" (m: ^Recursive_Mutex) -> bool {
 A condition variable.
 
 `Cond` implements a condition variable, a rendezvous point for threads waiting
-for signalling the occurence of an event. Condition variables are used on
+for signalling the occurence of an event. Condition variables are used in
 conjuction with mutexes to provide a shared access to one or more shared
 variable.
 
@@ -560,7 +559,7 @@ futex_wait :: proc "contextless" (f: ^Futex, expected: u32) {
 		return
 	}
 	ok := _futex_wait(f, expected)
-	_assert(ok, "futex_wait failure")
+	assert_contextless(ok, "futex_wait failure")
 }
 
 /*
@@ -597,18 +596,3 @@ Wake up multiple threads waiting on a futex.
 futex_broadcast :: proc "contextless" (f: ^Futex) {
 	_futex_broadcast(f)
 }
-
-
-@(private)
-_assert :: proc "contextless" (cond: bool, msg: string) {
-	if !cond {
-		_panic(msg)
-	}
-}
-
-@(private)
-_panic :: proc "contextless" (msg: string) -> ! {
-	runtime.print_string(msg)
-	runtime.print_byte('\n')
-	runtime.trap()
-}
diff --git a/core/sync/primitives_atomic.odin b/core/sync/primitives_atomic.odin
index 1d8e423db..3c4324eb7 100644
--- a/core/sync/primitives_atomic.odin
+++ b/core/sync/primitives_atomic.odin
@@ -240,7 +240,7 @@ atomic_recursive_mutex_lock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 
 atomic_recursive_mutex_unlock :: proc "contextless" (m: ^Atomic_Recursive_Mutex) {
 	tid := current_thread_id()
-	_assert(tid == m.owner, "tid != m.owner")
+	assert_contextless(tid == m.owner, "tid != m.owner")
 	m.recursion -= 1
 	recursion := m.recursion
 	if recursion == 0 {
@@ -361,7 +361,7 @@ atomic_sema_wait_with_timeout :: proc "contextless" (s: ^Atomic_Sema, duration:
 			if !futex_wait_with_timeout(&s.count, u32(original_count), remaining) {
 				return false
 			}
-			original_count = s.count
+			original_count = atomic_load_explicit(&s.count, .Relaxed)
 		}
 		if original_count == atomic_compare_exchange_strong_explicit(&s.count, original_count, original_count-1, .Acquire, .Acquire) {
 			return true
diff --git a/core/sys/darwin/sync.odin b/core/sys/darwin/sync.odin
index 121d3edef..58fc7c9e4 100644
--- a/core/sys/darwin/sync.odin
+++ b/core/sys/darwin/sync.odin
@@ -5,6 +5,7 @@ foreign import system "system:System.framework"
 // #define OS_WAIT_ON_ADDR_AVAILABILITY \
 // 	__API_AVAILABLE(macos(14.4), ios(17.4), tvos(17.4), watchos(10.4))
 when ODIN_OS == .Darwin {
+
 	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 17_04_00 {
 		WAIT_ON_ADDRESS_AVAILABLE :: true
 	} else when ODIN_MINIMUM_OS_VERSION >= 14_04_00 {
@@ -12,8 +13,18 @@ when ODIN_OS == .Darwin {
 	} else {
 		WAIT_ON_ADDRESS_AVAILABLE :: false
 	}
+
+	when ODIN_PLATFORM_SUBTARGET == .iOS && ODIN_MINIMUM_OS_VERSION >= 14_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else when ODIN_MINIMUM_OS_VERSION >= 11_00_00 {
+		ULOCK_WAIT_2_AVAILABLE :: true
+	} else {
+		ULOCK_WAIT_2_AVAILABLE :: false
+	}
+
 } else {
 	WAIT_ON_ADDRESS_AVAILABLE :: false
+	ULOCK_WAIT_2_AVAILABLE    :: false
 }
 
 os_sync_wait_on_address_flag :: enum u32 {
diff --git a/core/sys/info/platform_darwin.odin b/core/sys/info/platform_darwin.odin
index 3fd857bfe..493f038f0 100644
--- a/core/sys/info/platform_darwin.odin
+++ b/core/sys/info/platform_darwin.odin
@@ -530,6 +530,10 @@ macos_release_map: map[string]Darwin_To_Release = {
 	"23F79"      = {{23, 5, 0}, "macOS", {"Sonoma",         {14, 5, 0}}},
 	"23G80"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 0}}},
 	"23G93"      = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 6, 1}}},
+	"23H124"     = {{23, 6, 0}, "macOS", {"Sonoma",         {14, 7, 0}}},
+
+	// MacOS Sequoia
+	"24A335"     = {{24, 0, 0}, "macOS", {"Sequoia",        {15, 0, 0}}},
 }
 
 @(private)
diff --git a/core/testing/runner.odin b/core/testing/runner.odin
index 683db9d15..6b9d610ed 100644
--- a/core/testing/runner.odin
+++ b/core/testing/runner.odin
@@ -204,6 +204,10 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 		}
 	}
 
+	when ODIN_OS == .Windows {
+		console_ansi_init()
+	}
+
 	stdout := io.to_writer(os.stream_from_handle(os.stdout))
 	stderr := io.to_writer(os.stream_from_handle(os.stderr))
 
diff --git a/core/testing/runner_windows.odin b/core/testing/runner_windows.odin
new file mode 100644
index 000000000..fa233ff84
--- /dev/null
+++ b/core/testing/runner_windows.odin
@@ -0,0 +1,22 @@
+//+private
+package testing
+
+import win32 "core:sys/windows"
+
+console_ansi_init :: proc() {
+	stdout := win32.GetStdHandle(win32.STD_OUTPUT_HANDLE)
+	if stdout != win32.INVALID_HANDLE && stdout != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stdout, &old_console_mode) {
+			win32.SetConsoleMode(stdout, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+
+	stderr := win32.GetStdHandle(win32.STD_ERROR_HANDLE)
+	if stderr != win32.INVALID_HANDLE && stderr != nil {
+		old_console_mode: u32
+		if win32.GetConsoleMode(stderr, &old_console_mode) {
+			win32.SetConsoleMode(stderr, old_console_mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+		}
+	}
+}
diff --git a/core/testing/signal_handler_libc.odin b/core/testing/signal_handler_libc.odin
index b665b047c..7442c100c 100644
--- a/core/testing/signal_handler_libc.odin
+++ b/core/testing/signal_handler_libc.odin
@@ -26,6 +26,8 @@ import "core:os"
 
 @(private="file", thread_local)
 local_test_index: libc.sig_atomic_t
+@(private="file", thread_local)
+local_test_index_set: bool
 
 // Windows does not appear to have a SIGTRAP, so this is defined here, instead
 // of in the libc package, just so there's no confusion about it being
@@ -45,6 +47,13 @@ stop_runner_callback :: proc "c" (sig: libc.int) {
 
 @(private="file")
 stop_test_callback :: proc "c" (sig: libc.int) {
+	if !local_test_index_set {
+		// We're a thread created by a test thread.
+		//
+		// There's nothing we can do to inform the test runner about who
+		// signalled, so hopefully the test will handle their own sub-threads.
+		return
+	}
 	if local_test_index == -1 {
 		// We're the test runner, and we ourselves have caught a signal from
 		// which there is no recovery.
@@ -114,6 +123,7 @@ This is a dire bug and should be reported to the Odin developers.
 
 _setup_signal_handler :: proc() {
 	local_test_index = -1
+	local_test_index_set = true
 
 	// Catch user interrupt / CTRL-C.
 	libc.signal(libc.SIGINT, stop_runner_callback)
@@ -135,6 +145,7 @@ _setup_signal_handler :: proc() {
 
 _setup_task_signal_handler :: proc(test_index: int) {
 	local_test_index = cast(libc.sig_atomic_t)test_index
+	local_test_index_set = true
 }
 
 _should_stop_runner :: proc() -> bool {
diff --git a/core/testing/testing.odin b/core/testing/testing.odin
index d5e7c6830..09bf6dc0e 100644
--- a/core/testing/testing.odin
+++ b/core/testing/testing.odin
@@ -105,9 +105,13 @@ cleanup :: proc(t: ^T, procedure: proc(rawptr), user_data: rawptr) {
 	append(&t.cleanups, Internal_Cleanup{procedure, user_data, context})
 }
 
-expect :: proc(t: ^T, ok: bool, msg: string = "", loc := #caller_location) -> bool {
+expect :: proc(t: ^T, ok: bool, msg := "", expr := #caller_expression(ok), loc := #caller_location) -> bool {
 	if !ok {
-		log.error(msg, location=loc)
+		if msg == "" {
+			log.errorf("expected %v to be true", expr, location=loc)
+		} else {
+			log.error(msg, location=loc)
+		}
 	}
 	return ok
 }
@@ -119,10 +123,10 @@ expectf :: proc(t: ^T, ok: bool, format: string, args: ..any, loc := #caller_loc
 	return ok
 }
 
-expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) {
+expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location, value_expr := #caller_expression(value)) -> bool where intrinsics.type_is_comparable(T) {
 	ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected)
 	if !ok {
-		log.errorf("expected %v, got %v", expected, value, location=loc)
+		log.errorf("expected %v to be %v, got %v", value_expr, expected, value, location=loc)
 	}
 	return ok
 }
diff --git a/core/thread/thread.odin b/core/thread/thread.odin
index 17ba1a0a2..c1cbceb42 100644
--- a/core/thread/thread.odin
+++ b/core/thread/thread.odin
@@ -272,7 +272,7 @@ create_and_start :: proc(fn: proc(), init_context: Maybe(runtime.Context) = nil,
 	t := create(thread_proc, priority)
 	t.data = rawptr(fn)
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	t.init_context = init_context
 	start(t)
@@ -307,7 +307,7 @@ create_and_start_with_data :: proc(data: rawptr, fn: proc(data: rawptr), init_co
 	t.user_index = 1
 	t.user_args[0] = data
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 	t.init_context = init_context
 	start(t)
@@ -347,7 +347,7 @@ create_and_start_with_poly_data :: proc(data: $T, fn: proc(data: T), init_contex
 	mem.copy(&t.user_args[0], &data, size_of(T))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -394,7 +394,7 @@ create_and_start_with_poly_data2 :: proc(arg1: $T1, arg2: $T2, fn: proc(T1, T2),
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg2))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -443,7 +443,7 @@ create_and_start_with_poly_data3 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, fn: pr
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg3))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
@@ -494,7 +494,7 @@ create_and_start_with_poly_data4 :: proc(arg1: $T1, arg2: $T2, arg3: $T3, arg4:
 	_  = copy(user_args[n:], mem.ptr_to_bytes(&arg4))
 
 	if self_cleanup {
-		t.flags += {.Self_Cleanup}
+		intrinsics.atomic_or(&t.flags, {.Self_Cleanup})
 	}
 
 	t.init_context = init_context
diff --git a/core/thread/thread_pool.odin b/core/thread/thread_pool.odin
index 9bcc42968..d9166b450 100644
--- a/core/thread/thread_pool.odin
+++ b/core/thread/thread_pool.odin
@@ -60,6 +60,7 @@ pool_thread_runner :: proc(t: ^Thread) {
 		if task, ok := pool_pop_waiting(pool); ok {
 			data.task = task
 			pool_do_work(pool, task)
+			sync.guard(&pool.mutex)
 			data.task = {}
 		}
 	}
diff --git a/core/thread/thread_unix.odin b/core/thread/thread_unix.odin
index 2e196c3e6..9576a3040 100644
--- a/core/thread/thread_unix.odin
+++ b/core/thread/thread_unix.odin
@@ -5,18 +5,14 @@ package thread
 import "base:runtime"
 import "core:sync"
 import "core:sys/unix"
-import "core:time"
 
 _IS_SUPPORTED :: true
 
-CAS :: sync.atomic_compare_exchange_strong
-
 // NOTE(tetra): Aligned here because of core/unix/pthread_linux.odin/pthread_t.
 // Also see core/sys/darwin/mach_darwin.odin/semaphore_t.
 Thread_Os_Specific :: struct #align(16) {
 	unix_thread: unix.pthread_t, // NOTE: very large on Darwin, small on Linux.
-	cond:        sync.Cond,
-	mutex:       sync.Mutex,
+	start_ok:    sync.Sema,
 }
 //
 // Creates a thread which will run the given procedure.
@@ -29,14 +25,10 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 		// We need to give the thread a moment to start up before we enable cancellation.
 		can_set_thread_cancel_state := unix.pthread_setcancelstate(unix.PTHREAD_CANCEL_ENABLE, nil) == 0
 
-		sync.lock(&t.mutex)
-
 		t.id = sync.current_thread_id()
 
-		for (.Started not_in sync.atomic_load(&t.flags)) {
-			// HACK: use a timeout so in the event that the condition is signalled at THIS comment's exact point
-			// (after checking flags, before starting the wait) it gets itself out of that deadlock after a ms.
-			sync.wait_with_timeout(&t.cond, &t.mutex, time.Millisecond)
+		if .Started not_in sync.atomic_load(&t.flags) {
+			sync.wait(&t.start_ok)
 		}
 
 		if .Joined in sync.atomic_load(&t.flags) {
@@ -66,8 +58,6 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 		sync.atomic_or(&t.flags, { .Done })
 
-		sync.unlock(&t.mutex)
-
 		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			res := unix.pthread_detach(t.unix_thread)
 			assert_contextless(res == 0)
@@ -132,7 +122,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 
 _start :: proc(t: ^Thread) {
 	sync.atomic_or(&t.flags, { .Started })
-	sync.signal(&t.cond)
+	sync.post(&t.start_ok)
 }
 
 _is_done :: proc(t: ^Thread) -> bool {
@@ -140,24 +130,18 @@ _is_done :: proc(t: ^Thread) -> bool {
 }
 
 _join :: proc(t: ^Thread) {
-	// sync.guard(&t.mutex)
-
 	if unix.pthread_equal(unix.pthread_self(), t.unix_thread) {
 		return
 	}
 
-	// Preserve other flags besides `.Joined`, like `.Started`.
-	unjoined := sync.atomic_load(&t.flags) - {.Joined}
-	joined   := unjoined + {.Joined}
-
-	// Try to set `t.flags` from unjoined to joined. If it returns joined,
-	// it means the previous value had that flag set and we can return.
-	if res, ok := CAS(&t.flags, unjoined, joined); res == joined && !ok {
+	// If the previous value was already `Joined`, then we can return.
+	if .Joined in sync.atomic_or(&t.flags, {.Joined}) {
 		return
 	}
+
 	// Prevent non-started threads from blocking main thread with initial wait
 	// condition.
-	if .Started not_in unjoined {
+	if .Started not_in sync.atomic_load(&t.flags) {
 		_start(t)
 	}
 	unix.pthread_join(t.unix_thread, nil)
diff --git a/core/thread/thread_windows.odin b/core/thread/thread_windows.odin
index 300673191..cc73a2d6a 100644
--- a/core/thread/thread_windows.odin
+++ b/core/thread/thread_windows.odin
@@ -27,7 +27,7 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 	__windows_thread_entry_proc :: proc "system" (t_: rawptr) -> win32.DWORD {
 		t := (^Thread)(t_)
 
-		if .Joined in t.flags {
+		if .Joined in sync.atomic_load(&t.flags) {
 			return 0
 		}
 
@@ -48,9 +48,9 @@ _create :: proc(procedure: Thread_Proc, priority: Thread_Priority) -> ^Thread {
 			t.procedure(t)
 		}
 
-		intrinsics.atomic_store(&t.flags, t.flags + {.Done})
+		intrinsics.atomic_or(&t.flags, {.Done})
 
-		if .Self_Cleanup in t.flags {
+		if .Self_Cleanup in sync.atomic_load(&t.flags) {
 			win32.CloseHandle(t.win32_thread)
 			t.win32_thread = win32.INVALID_HANDLE
 			// NOTE(ftphikari): It doesn't matter which context 'free' received, right?
diff --git a/src/bug_report.cpp b/src/bug_report.cpp
index 5f768f57f..fa7e156ce 100644
--- a/src/bug_report.cpp
+++ b/src/bug_report.cpp
@@ -919,6 +919,8 @@ gb_internal void report_os_info() {
 			{"23F79",    {23,  5,  0}, "macOS", {"Sonoma",        {14,  5,  0}}},
 			{"23G80",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  0}}},
 			{"23G93",    {23,  6,  0}, "macOS", {"Sonoma",        {14,  6,  1}}},
+			{"23H124",   {23,  6,  0}, "macOS", {"Sonoma",        {14,  7,  0}}},
+			{"24A335",   {24,  0,  0}, "macOS", {"Sequoia",       {15,  0,  0}}},
 		};
 
 
diff --git a/src/build_settings.cpp b/src/build_settings.cpp
index 1aca5d424..7aff8e650 100644
--- a/src/build_settings.cpp
+++ b/src/build_settings.cpp
@@ -285,6 +285,7 @@ enum VetFlags : u64 {
 	VetFlag_Deprecated      = 1u<<7,
 	VetFlag_Cast            = 1u<<8,
 	VetFlag_Tabs            = 1u<<9,
+	VetFlag_UnusedProcedures = 1u<<10,
 
 	VetFlag_Unused = VetFlag_UnusedVariables|VetFlag_UnusedImports,
 
@@ -316,6 +317,8 @@ u64 get_vet_flag_from_name(String const &name) {
 		return VetFlag_Cast;
 	} else if (name == "tabs") {
 		return VetFlag_Tabs;
+	} else if (name == "unused-procedures") {
+		return VetFlag_UnusedProcedures;
 	}
 	return VetFlag_NONE;
 }
@@ -383,6 +386,7 @@ struct BuildContext {
 
 	u64 vet_flags;
 	u32 sanitizer_flags;
+	StringSet vet_packages;
 
 	bool   has_resource;
 	String link_flags;
@@ -1462,8 +1466,6 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta
 		bc->thread_count = gb_max(bc->affinity.thread_count, 1);
 	}
 
-	string_set_init(&bc->custom_attributes);
-
 	bc->ODIN_VENDOR  = str_lit("odin");
 	bc->ODIN_VERSION = ODIN_VERSION;
 	bc->ODIN_ROOT    = odin_root_dir();
diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp
index 888aa074d..909eb668e 100644
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -1632,6 +1632,22 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o
 
 		operand->type = t_source_code_location;
 		operand->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		if (ce->args.count > 1) {
+			error(ce->args[0], "'#caller_expression' expects either 0 or 1 arguments, got %td", ce->args.count);
+		}
+		if (ce->args.count > 0) {
+			Ast *arg = ce->args[0];
+			Operand o = {};
+			Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true);
+			if (e == nullptr || (e->flags & EntityFlag_Param) == 0) {
+				error(ce->args[0], "'#caller_expression' expected a valid earlier parameter name");
+			}
+			arg->Ident.entity = e;
+		}
+
+		operand->type = t_string;
+		operand->mode = Addressing_Value;
 	} else if (name == "exists") {
 		if (ce->args.count != 1) {
 			error(ce->close, "'#exists' expects 1 argument, got %td", ce->args.count);
diff --git a/src/check_expr.cpp b/src/check_expr.cpp
index 7f82fb58a..6776094bf 100644
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -7807,7 +7807,8 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c
 		    name == "load" ||
 		    name == "load_directory" ||
 		    name == "load_hash" ||
-		    name == "hash"
+		    name == "hash" ||
+		    name == "caller_expression"
 		) {
 			operand->mode = Addressing_Builtin;
 			operand->builtin_id = BuiltinProc_DIRECTIVE;
@@ -8725,6 +8726,10 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A
 		error(node, "#caller_location may only be used as a default argument parameter");
 		o->type = t_source_code_location;
 		o->mode = Addressing_Value;
+	} else if (name == "caller_expression") {
+		error(node, "#caller_expression may only be used as a default argument parameter");
+		o->type = t_string;
+		o->mode = Addressing_Value;
 	} else {
 		if (name == "location") {
 			init_core_source_code_location(c->checker);
diff --git a/src/check_stmt.cpp b/src/check_stmt.cpp
index c8717ba98..74a9e8825 100644
--- a/src/check_stmt.cpp
+++ b/src/check_stmt.cpp
@@ -1641,6 +1641,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 	Ast *expr = unparen_expr(rs->expr);
 
+	Operand rhs_operand = {};
+
 	bool is_range = false;
 	bool is_possibly_addressable = true;
 	isize max_val_count = 2;
@@ -1698,7 +1700,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					}
 				}
 			}
-			bool is_ptr = is_type_pointer(type_deref(operand.type));
+			bool is_ptr = is_type_pointer(operand.type);
 			Type *t = base_type(type_deref(operand.type));
 
 			switch (t->kind) {
@@ -1750,16 +1752,19 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 				break;
 
 			case Type_DynamicArray:
+				is_possibly_addressable = true;
 				array_add(&vals, t->DynamicArray.elem);
 				array_add(&vals, t_int);
 				break;
 
 			case Type_Slice:
+				is_possibly_addressable = true;
 				array_add(&vals, t->Slice.elem);
 				array_add(&vals, t_int);
 				break;
 
 			case Type_Map:
+				is_possibly_addressable = true;
 				is_map = true;
 				array_add(&vals, t->Map.key);
 				array_add(&vals, t->Map.value);
@@ -1781,6 +1786,8 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 			case Type_Tuple:
 				{
+					is_possibly_addressable = false;
+
 					isize count = t->Tuple.variables.count;
 					if (count < 1) {
 						ERROR_BLOCK();
@@ -1810,8 +1817,6 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 						array_add(&vals, e->type);
 					}
 
-					is_possibly_addressable = false;
-
 					bool do_break = false;
 					for (isize i = rs->vals.count-1; i >= 0; i--) {
 						if (rs->vals[i] != nullptr && count < i+2) {
@@ -1831,6 +1836,11 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 
 			case Type_Struct:
 				if (t->Struct.soa_kind != StructSoa_None) {
+					if (t->Struct.soa_kind == StructSoa_Fixed) {
+						is_possibly_addressable = operand.mode == Addressing_Variable || is_ptr;
+					} else {
+						is_possibly_addressable = true;
+					}
 					is_soa = true;
 					array_add(&vals, t->Struct.soa_elem);
 					array_add(&vals, t_int);
@@ -1907,7 +1917,7 @@ gb_internal void check_range_stmt(CheckerContext *ctx, Ast *node, u32 mod_flags)
 					if (is_possibly_addressable && i == addressable_index) {
 						entity->flags &= ~EntityFlag_Value;
 					} else {
-						char const *idx_name = is_map ? "key" : is_bit_set ? "element" : "index";
+						char const *idx_name = is_map ? "key" : (is_bit_set || i == 0) ? "element" : "index";
 						error(token, "The %s variable '%.*s' cannot be made addressable", idx_name, LIT(str));
 					}
 				}
diff --git a/src/check_type.cpp b/src/check_type.cpp
index 3767f7666..f0e0acb9b 100644
--- a/src/check_type.cpp
+++ b/src/check_type.cpp
@@ -1605,6 +1605,25 @@ gb_internal bool is_expr_from_a_parameter(CheckerContext *ctx, Ast *expr) {
 	return false;
 }
 
+gb_internal bool is_caller_expression(Ast *expr) {
+	if (expr->kind == Ast_BasicDirective && expr->BasicDirective.name.string == "caller_expression") {
+		return true;
+	}
+
+	Ast *call = unparen_expr(expr);
+	if (call->kind != Ast_CallExpr) {
+		return false;
+	}
+
+	ast_node(ce, CallExpr, call);
+	if (ce->proc->kind != Ast_BasicDirective) {
+		return false;
+	}
+
+	ast_node(bd, BasicDirective, ce->proc);
+	String name = bd->name.string;
+	return name == "caller_expression";
+}
 
 gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_type, Type **out_type_, Ast *expr, bool allow_caller_location) {
 	ParameterValue param_value = {};
@@ -1626,7 +1645,19 @@ gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_
 		if (in_type) {
 			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
 		}
+	} else if (is_caller_expression(expr)) {
+		if (expr->kind != Ast_BasicDirective) {
+			check_builtin_procedure_directive(ctx, &o, expr, t_string);
+		}
+
+		param_value.kind = ParameterValue_Expression;
+		o.type = t_string;
+		o.mode = Addressing_Value;
+		o.expr = expr;
 
+		if (in_type) {
+			check_assignment(ctx, &o, in_type, str_lit("parameter value"));
+		}
 	} else {
 		if (in_type) {
 			check_expr_with_type_hint(ctx, &o, expr, in_type);
@@ -1858,6 +1889,7 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para
 			case ParameterValue_Nil:
 				break;
 			case ParameterValue_Location:
+			case ParameterValue_Expression:
 			case ParameterValue_Value:
 				gbString str = type_to_string(type);
 				error(params[i], "A default value for a parameter must not be a polymorphic constant type, got %s", str);
diff --git a/src/checker.cpp b/src/checker.cpp
index 64c66c8a6..af1e0e675 100644
--- a/src/checker.cpp
+++ b/src/checker.cpp
@@ -533,18 +533,13 @@ gb_internal u64 check_vet_flags(CheckerContext *c) {
 	    c->curr_proc_decl->proc_lit) {
 		file = c->curr_proc_decl->proc_lit->file();
 	}
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+
+	return ast_file_vet_flags(file);
 }
 
 gb_internal u64 check_vet_flags(Ast *node) {
 	AstFile *file = node->file();
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
+	return ast_file_vet_flags(file);
 }
 
 enum VettedEntityKind {
@@ -681,20 +676,45 @@ gb_internal bool check_vet_unused(Checker *c, Entity *e, VettedEntity *ve) {
 	return false;
 }
 
-gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
-	bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
-	bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
-
+gb_internal void check_scope_usage_internal(Checker *c, Scope *scope, u64 vet_flags, bool per_entity) {
+	u64 original_vet_flags = vet_flags;
 	Array<VettedEntity> vetted_entities = {};
 	array_init(&vetted_entities, heap_allocator());
+	defer (array_free(&vetted_entities));
 
 	rw_mutex_shared_lock(&scope->mutex);
 	for (auto const &entry : scope->elements) {
 		Entity *e = entry.value;
 		if (e == nullptr) continue;
+
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
+		bool vet_unused = (vet_flags & VetFlag_Unused) != 0;
+		bool vet_shadowing = (vet_flags & (VetFlag_Shadowing|VetFlag_Using)) != 0;
+		bool vet_unused_procedures = (vet_flags & VetFlag_UnusedProcedures) != 0;
+
 		VettedEntity ve_unused = {};
 		VettedEntity ve_shadowed = {};
-		bool is_unused = vet_unused && check_vet_unused(c, e, &ve_unused);
+		bool is_unused = false;
+		if (vet_unused && check_vet_unused(c, e, &ve_unused)) {
+			is_unused = true;
+		} else if (vet_unused_procedures &&
+		           e->kind == Entity_Procedure) {
+			if (e->flags&EntityFlag_Used) {
+				is_unused = false;
+			} else if (e->flags & EntityFlag_Require) {
+				is_unused = false;
+			} else if (e->pkg && e->pkg->kind == Package_Init && e->token.string == "main") {
+				is_unused = false;
+			} else {
+				is_unused = true;
+				ve_unused.kind = VettedEntity_Unused;
+				ve_unused.entity = e;
+			}
+		}
 		bool is_shadowed = vet_shadowing && check_vet_shadowing(c, e, &ve_shadowed);
 		if (is_unused && is_shadowed) {
 			VettedEntity ve_both = ve_shadowed;
@@ -717,13 +737,18 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 	}
 	rw_mutex_shared_unlock(&scope->mutex);
 
-	gb_sort(vetted_entities.data, vetted_entities.count, gb_size_of(VettedEntity), vetted_entity_variable_pos_cmp);
+	array_sort(vetted_entities, vetted_entity_variable_pos_cmp);
 
 	for (auto const &ve : vetted_entities) {
 		Entity *e = ve.entity;
 		Entity *other = ve.other;
 		String name = e->token.string;
 
+		vet_flags = original_vet_flags;
+		if (per_entity) {
+			vet_flags = ast_file_vet_flags(e->file);
+		}
+
 		if (ve.kind == VettedEntity_Shadowed_And_Unused) {
 			error(e->token, "'%.*s' declared but not used, possibly shadows declaration at line %d", LIT(name), other->token.pos.line);
 		} else if (vet_flags) {
@@ -732,6 +757,9 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 				if (e->kind == Entity_Variable && (vet_flags & VetFlag_UnusedVariables) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
+				if (e->kind == Entity_Procedure && (vet_flags & VetFlag_UnusedProcedures) != 0) {
+					error(e->token, "'%.*s' declared but not used", LIT(name));
+				}
 				if ((e->kind == Entity_ImportName || e->kind == Entity_LibraryName) && (vet_flags & VetFlag_UnusedImports) != 0) {
 					error(e->token, "'%.*s' declared but not used", LIT(name));
 				}
@@ -749,7 +777,11 @@ gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
 		}
 	}
 
-	array_free(&vetted_entities);
+}
+
+
+gb_internal void check_scope_usage(Checker *c, Scope *scope, u64 vet_flags) {
+	check_scope_usage_internal(c, scope, vet_flags, false);
 
 	for (Scope *child = scope->head_child; child != nullptr; child = child->next) {
 		if (child->flags & (ScopeFlag_Proc|ScopeFlag_Type|ScopeFlag_File)) {
@@ -6497,12 +6529,13 @@ gb_internal void check_parsed_files(Checker *c) {
 	TIME_SECTION("check scope usage");
 	for (auto const &entry : c->info.files) {
 		AstFile *f = entry.value;
-		u64 vet_flags = build_context.vet_flags;
-		if (f->vet_flags_set) {
-			vet_flags = f->vet_flags;
-		}
+		u64 vet_flags = ast_file_vet_flags(f);
 		check_scope_usage(c, f->scope, vet_flags);
 	}
+	for (auto const &entry : c->info.packages) {
+		AstPackage *pkg = entry.value;
+		check_scope_usage_internal(c, pkg->scope, 0, true);
+	}
 
 	TIME_SECTION("add basic type information");
 	// Add "Basic" type information
diff --git a/src/entity.cpp b/src/entity.cpp
index db6ffdd52..0c4a20df4 100644
--- a/src/entity.cpp
+++ b/src/entity.cpp
@@ -104,6 +104,7 @@ enum ParameterValueKind {
 	ParameterValue_Constant,
 	ParameterValue_Nil,
 	ParameterValue_Location,
+	ParameterValue_Expression,
 	ParameterValue_Value,
 };
 
diff --git a/src/gb/gb.h b/src/gb/gb.h
index 0e65696e2..1fef4b4f5 100644
--- a/src/gb/gb.h
+++ b/src/gb/gb.h
@@ -3195,11 +3195,11 @@ void gb_affinity_init(gbAffinity *a) {
 	a->core_count       = 1;
 	a->threads_per_core = 1;
 
-	if (sysctlbyname("hw.logicalcpu", &count, &count_size, NULL, 0) == 0) {
+	if (sysctlbyname("kern.smp.cpus", &count, &count_size, NULL, 0) == 0) {
 		if (count > 0) {
 			a->thread_count = count;
 			// Get # of physical cores
-			if (sysctlbyname("hw.physicalcpu", &count, &count_size, NULL, 0) == 0) {
+			if (sysctlbyname("kern.smp.cores", &count, &count_size, NULL, 0) == 0) {
 				if (count > 0) {
 					a->core_count = count;
 					a->threads_per_core = a->thread_count / count;
@@ -3210,6 +3210,14 @@ void gb_affinity_init(gbAffinity *a) {
 				}
 			}
 		}
+	} else if (sysctlbyname("hw.ncpu", &count, &count_size, NULL, 0) == 0) {
+		// SMP disabled or unavailable.
+		if (count > 0) {
+			a->is_accurate      = true;
+			a->thread_count     = count;
+			a->core_count       = count;
+			a->threads_per_core = 1;
+		}
 	}
 
 }
diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp
index 29d2ccfe6..68f95cb03 100644
--- a/src/llvm_backend.hpp
+++ b/src/llvm_backend.hpp
@@ -528,7 +528,7 @@ gb_internal lbAddr lb_store_range_stmt_val(lbProcedure *p, Ast *stmt_val, lbValu
 gb_internal lbValue lb_emit_source_code_location_const(lbProcedure *p, String const &procedure, TokenPos const &pos);
 gb_internal lbValue lb_const_source_code_location_const(lbModule *m, String const &procedure, TokenPos const &pos);
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos);
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast *call_expression);
 
 gb_internal lbValue lb_equal_proc_for_type(lbModule *m, Type *type);
 gb_internal lbValue lb_hasher_proc_for_type(lbModule *m, Type *type);
diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp
index 68e1efc1c..5cc79dcc8 100644
--- a/src/llvm_backend_debug.cpp
+++ b/src/llvm_backend_debug.cpp
@@ -552,6 +552,48 @@ gb_internal LLVMMetadataRef lb_debug_bitset(lbModule *m, Type *type, String name
 	return final_decl;
 }
 
+gb_internal LLVMMetadataRef lb_debug_bitfield(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
+	Type *bt = base_type(type);
+	GB_ASSERT(bt->kind == Type_BitField);
+
+	lb_debug_file_line(m, bt->BitField.node, &file, &line);
+
+	u64 size_in_bits = 8*type_size_of(bt);
+	u32 align_in_bits = 8*cast(u32)type_align_of(bt);
+
+    unsigned element_count = cast(unsigned)bt->BitField.fields.count;
+    LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
+
+    u64 offset_in_bits = 0;
+    for (unsigned i = 0; i < element_count; i++) {
+        Entity *f = bt->BitField.fields[i];
+        u8 bit_size = bt->BitField.bit_sizes[i];
+        GB_ASSERT(f->kind == Entity_Variable);
+        String name = f->token.string;
+        elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, line,
+            bit_size, offset_in_bits, 0,
+            LLVMDIFlagZero, lb_debug_type(m, f->type)
+        );
+
+        offset_in_bits += bit_size;
+    }
+
+	LLVMMetadataRef final_decl = LLVMDIBuilderCreateStructType(
+		m->debug_builder, scope,
+		cast(char const *)name.text, cast(size_t)name.len,
+		file, line,
+		size_in_bits, align_in_bits,
+		LLVMDIFlagZero,
+		nullptr,
+		elements, element_count,
+		0,
+		nullptr,
+		"", 0
+	);
+	lb_set_llvm_metadata(m, type, final_decl);
+	return final_decl;
+}
+
 gb_internal LLVMMetadataRef lb_debug_enum(lbModule *m, Type *type, String name, LLVMMetadataRef scope, LLVMMetadataRef file, unsigned line) {
 	Type *bt = base_type(type);
 	GB_ASSERT(bt->kind == Type_Enum);
@@ -816,6 +858,7 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 	case Type_Union:        return lb_debug_union(        m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_BitSet:       return lb_debug_bitset(       m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 	case Type_Enum:         return lb_debug_enum(         m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
+	case Type_BitField:     return lb_debug_bitfield(     m, type,       make_string_c(type_to_string(type, temporary_allocator())), nullptr, nullptr, 0);
 
 	case Type_Tuple:
 		if (type->Tuple.variables.count == 1) {
@@ -901,42 +944,6 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) {
 			lb_debug_type(m, type->Matrix.elem),
 			subscripts, gb_count_of(subscripts));
 	}
-
-	case Type_BitField: {
-		LLVMMetadataRef parent_scope = nullptr;
-		LLVMMetadataRef scope = nullptr;
-		LLVMMetadataRef file = nullptr;
-		unsigned line = 0;
-		u64 size_in_bits = 8*cast(u64)type_size_of(type);
-		u32 align_in_bits = 8*cast(u32)type_align_of(type);
-		LLVMDIFlags flags = LLVMDIFlagZero;
-
-		unsigned element_count = cast(unsigned)type->BitField.fields.count;
-		LLVMMetadataRef *elements = gb_alloc_array(permanent_allocator(), LLVMMetadataRef, element_count);
-
-		u64 offset_in_bits = 0;
-		for (unsigned i = 0; i < element_count; i++) {
-			Entity *f = type->BitField.fields[i];
-			u8 bit_size = type->BitField.bit_sizes[i];
-			GB_ASSERT(f->kind == Entity_Variable);
-			String name = f->token.string;
-			unsigned field_line = 0;
-			LLVMDIFlags field_flags = LLVMDIFlagZero;
-			elements[i] = LLVMDIBuilderCreateBitFieldMemberType(m->debug_builder, scope, cast(char const *)name.text, name.len, file, field_line,
-				bit_size, offset_in_bits, offset_in_bits,
-				field_flags, lb_debug_type(m, f->type)
-			);
-
-			offset_in_bits += bit_size;
-		}
-
-
-		return LLVMDIBuilderCreateStructType(m->debug_builder, parent_scope, "", 0, file, line,
-			size_in_bits, align_in_bits, flags,
-			nullptr, elements, element_count, 0, nullptr,
-			"", 0
-		);
-	}
 	}
 
 	GB_PANIC("Invalid type %s", type_to_string(type));
@@ -1022,6 +1029,7 @@ gb_internal LLVMMetadataRef lb_debug_type(lbModule *m, Type *type) {
 		case Type_Union:        return lb_debug_union(m, type, name, scope, file, line);
 		case Type_BitSet:       return lb_debug_bitset(m, type, name, scope, file, line);
 		case Type_Enum:         return lb_debug_enum(m, type, name, scope, file, line);
+		case Type_BitField:     return lb_debug_bitfield(m, type, name, scope, file, line);
 		}
 	}
 
diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp
index e850d3364..d84599eb0 100644
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -699,7 +699,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) {
 					}
 
 					if (e->Variable.param_value.kind != ParameterValue_Invalid) {
-						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, e->token.pos);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Location);
+						GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Expression);
+						lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, nullptr, nullptr);
 						lb_addr_store(p, res, c);
 					}
 
@@ -3420,7 +3422,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
 }
 
 
-gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TokenPos const &pos) {
+gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const &param_value, TypeProc *procedure_type, Ast* call_expression) {
 	switch (param_value.kind) {
 	case ParameterValue_Constant:
 		if (is_type_constant_type(parameter_type)) {
@@ -3446,8 +3448,60 @@ gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type,
 			if (p->entity != nullptr) {
 				proc_name = p->entity->token.string;
 			}
+
+			ast_node(ce, CallExpr, call_expression);
+			TokenPos pos = ast_token(ce->proc).pos;
+
 			return lb_emit_source_code_location_as_global(p, proc_name, pos);
 		}
+	case ParameterValue_Expression:
+		{
+			Ast *orig = param_value.original_ast_expr;
+			if (orig->kind == Ast_BasicDirective) {
+				gbString expr = expr_to_string(call_expression, temporary_allocator());
+				return lb_const_string(p->module, make_string_c(expr));
+			}
+
+			isize param_idx = -1;
+			String param_str = {0};
+			{
+				Ast *call = unparen_expr(orig);
+				GB_ASSERT(call->kind == Ast_CallExpr);
+				ast_node(ce, CallExpr, call);
+				GB_ASSERT(ce->proc->kind == Ast_BasicDirective);
+				GB_ASSERT(ce->args.count == 1);
+				Ast *target = ce->args[0];
+				GB_ASSERT(target->kind == Ast_Ident);
+				String target_str = target->Ident.token.string;
+
+				param_idx = lookup_procedure_parameter(procedure_type, target_str);
+				param_str = target_str;
+			}
+			GB_ASSERT(param_idx >= 0);
+
+
+			Ast *target_expr = nullptr;
+			ast_node(ce, CallExpr, call_expression);
+
+			if (ce->split_args->positional.count > param_idx) {
+				target_expr = ce->split_args->positional[param_idx];
+			}
+
+			for_array(i, ce->split_args->named) {
+				Ast *arg = ce->split_args->named[i];
+				ast_node(fv, FieldValue, arg);
+				GB_ASSERT(fv->field->kind == Ast_Ident);
+				String name = fv->field->Ident.token.string;
+				if (name == param_str) {
+					target_expr = fv->value;
+					break;
+				}
+			}
+
+			gbString expr = expr_to_string(target_expr, temporary_allocator());
+			return lb_const_string(p->module, make_string_c(expr));
+		}
+
 	case ParameterValue_Value:
 		return lb_build_expr(p, param_value.ast_value);
 	}
@@ -3739,8 +3793,6 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 		}
 	}
 
-	TokenPos pos = ast_token(ce->proc).pos;
-
 
 	if (pt->params != nullptr)  {
 		isize min_count = pt->params->Tuple.variables.count;
@@ -3764,7 +3816,7 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
 					args[arg_index] = lb_const_nil(p->module, e->type);
 					break;
 				case Entity_Variable:
-					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pos);
+					args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pt, expr);
 					break;
 
 				case Entity_Constant:
diff --git a/src/main.cpp b/src/main.cpp
index 06c200442..a969e32a9 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -340,12 +340,14 @@ enum BuildFlagKind {
 	BuildFlag_VetUnused,
 	BuildFlag_VetUnusedImports,
 	BuildFlag_VetUnusedVariables,
+	BuildFlag_VetUnusedProcedures,
 	BuildFlag_VetUsingStmt,
 	BuildFlag_VetUsingParam,
 	BuildFlag_VetStyle,
 	BuildFlag_VetSemicolon,
 	BuildFlag_VetCast,
 	BuildFlag_VetTabs,
+	BuildFlag_VetPackages,
 
 	BuildFlag_CustomAttribute,
 	BuildFlag_IgnoreUnknownAttributes,
@@ -547,6 +549,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_Vet,                     str_lit("vet"),                       BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnused,               str_lit("vet-unused"),                BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedVariables,      str_lit("vet-unused-variables"),      BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetUnusedProcedures,     str_lit("vet-unused-procedures"),     BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUnusedImports,        str_lit("vet-unused-imports"),        BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetShadowing,            str_lit("vet-shadowing"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetUsingStmt,            str_lit("vet-using-stmt"),            BuildFlagParam_None,    Command__does_check);
@@ -555,6 +558,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 	add_flag(&build_flags, BuildFlag_VetSemicolon,            str_lit("vet-semicolon"),             BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetCast,                 str_lit("vet-cast"),                  BuildFlagParam_None,    Command__does_check);
 	add_flag(&build_flags, BuildFlag_VetTabs,                 str_lit("vet-tabs"),                  BuildFlagParam_None,    Command__does_check);
+	add_flag(&build_flags, BuildFlag_VetPackages,             str_lit("vet-packages"),              BuildFlagParam_String,  Command__does_check);
 
 	add_flag(&build_flags, BuildFlag_CustomAttribute,         str_lit("custom-attribute"),          BuildFlagParam_String,  Command__does_check, true);
 	add_flag(&build_flags, BuildFlag_IgnoreUnknownAttributes, str_lit("ignore-unknown-attributes"), BuildFlagParam_None,    Command__does_check);
@@ -1220,6 +1224,36 @@ gb_internal bool parse_build_flags(Array<String> args) {
 						case BuildFlag_VetSemicolon:       build_context.vet_flags |= VetFlag_Semicolon;       break;
 						case BuildFlag_VetCast:            build_context.vet_flags |= VetFlag_Cast;            break;
 						case BuildFlag_VetTabs:            build_context.vet_flags |= VetFlag_Tabs;            break;
+						case BuildFlag_VetUnusedProcedures:
+							build_context.vet_flags |= VetFlag_UnusedProcedures;
+							if (!set_flags[BuildFlag_VetPackages]) {
+								gb_printf_err("-%.*s must be used with -vet-packages\n", LIT(name));
+								bad_flags = true;
+							}
+							break;
+
+						case BuildFlag_VetPackages:
+							{
+								GB_ASSERT(value.kind == ExactValue_String);
+								String val = value.value_string;
+								String_Iterator it = {val, 0};
+								for (;;) {
+									String pkg = string_split_iterator(&it, ',');
+									if (pkg.len == 0) {
+										break;
+									}
+
+									pkg = string_trim_whitespace(pkg);
+									if (!string_is_valid_identifier(pkg)) {
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(pkg));
+										bad_flags = true;
+										continue;
+									}
+
+									string_set_add(&build_context.vet_packages, pkg);
+								}
+							}
+							break;
 
 						case BuildFlag_CustomAttribute:
 							{
@@ -1234,7 +1268,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
 
 									attr = string_trim_whitespace(attr);
 									if (!string_is_valid_identifier(attr)) {
-										gb_printf_err("-custom-attribute '%.*s' must be a valid identifier\n", LIT(attr));
+										gb_printf_err("-%.*s '%.*s' must be a valid identifier\n", LIT(name), LIT(attr));
 										bad_flags = true;
 										continue;
 									}
@@ -2364,7 +2398,7 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(0, "");
 
 		print_usage_line(1, "-vet-unused");
-		print_usage_line(2, "Checks for unused declarations.");
+		print_usage_line(2, "Checks for unused declarations (variables and imports).");
 		print_usage_line(0, "");
 
 		print_usage_line(1, "-vet-unused-variables");
@@ -2406,6 +2440,16 @@ gb_internal void print_show_help(String const arg0, String const &command) {
 		print_usage_line(1, "-vet-tabs");
 		print_usage_line(2, "Errs when the use of tabs has not been used for indentation.");
 		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-packages:<comma-separated-strings>");
+		print_usage_line(2, "Sets which packages by name will be vetted.");
+		print_usage_line(2, "Files with specific +vet tags will not be ignored if they are not in the packages set.");
+		print_usage_line(0, "");
+
+		print_usage_line(1, "-vet-unused-procedures");
+		print_usage_line(2, "Checks for unused procedures.");
+		print_usage_line(2, "Must be used with -vet-packages or specified on a per file with +vet tags.");
+		print_usage_line(0, "");
 	}
 
 	if (check) {
@@ -3150,6 +3194,9 @@ int main(int arg_count, char const **arg_ptr) {
 
 	build_context.command = command;
 
+	string_set_init(&build_context.custom_attributes);
+	string_set_init(&build_context.vet_packages);
+
 	if (!parse_build_flags(args)) {
 		return 1;
 	}
diff --git a/src/parser.cpp b/src/parser.cpp
index 22d92e87b..242671af0 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -1,10 +1,28 @@
 #include "parser_pos.cpp"
 
+gb_internal bool in_vet_packages(AstFile *file) {
+	if (file == nullptr) {
+		return true;
+	}
+	if (file->pkg == nullptr) {
+		return true;
+	}
+	if (build_context.vet_packages.entries.count == 0) {
+		return true;
+	}
+	return string_set_exists(&build_context.vet_packages, file->pkg->name);
+}
+
 gb_internal u64 ast_file_vet_flags(AstFile *f) {
 	if (f != nullptr && f->vet_flags_set) {
 		return f->vet_flags;
 	}
-	return build_context.vet_flags;
+
+	bool found = in_vet_packages(f);
+	if (found) {
+		return build_context.vet_flags;
+	}
+	return 0;
 }
 
 gb_internal bool ast_file_vet_style(AstFile *f) {
@@ -5378,18 +5396,9 @@ gb_internal Ast *parse_stmt(AstFile *f) {
 }
 
 
-
-gb_internal u64 check_vet_flags(AstFile *file) {
-	if (file && file->vet_flags_set) {
-		return file->vet_flags;
-	}
-	return build_context.vet_flags;
-}
-
-
 gb_internal void parse_enforce_tabs(AstFile *f) {
 	// Checks to see if tabs have been used for indentation
-	if ((check_vet_flags(f) & VetFlag_Tabs) == 0) {
+	if ((ast_file_vet_flags(f) & VetFlag_Tabs) == 0) {
 		return;
 	}
 
diff --git a/tests/core/flags/test_core_flags.odin b/tests/core/flags/test_core_flags.odin
index f8305f2ea..8fcd6a8a7 100644
--- a/tests/core/flags/test_core_flags.odin
+++ b/tests/core/flags/test_core_flags.odin
@@ -12,6 +12,26 @@ import "core:strings"
 import "core:testing"
 import "core:time/datetime"
 
+Custom_Data :: struct {
+	a: int,
+}
+
+@(init)
+init_custom_type_setter :: proc() {
+	// NOTE: This is done here so it can be out of the flow of the
+	// multi-threaded test runner, to prevent any data races that could be
+	// reported by using `-sanitize:thread`.
+	//
+	// Do mind that this means every test here acknowledges the `Custom_Data` type.
+	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
+		if data_type == Custom_Data {
+			(cast(^Custom_Data)data).a = 32
+			return "", true, nil
+		}
+		return "", false, nil
+	})
+}
+
 @(test)
 test_no_args :: proc(t: ^testing.T) {
 	S :: struct {
@@ -1230,9 +1250,6 @@ test_net :: proc(t: ^testing.T) {
 @(test)
 test_custom_type_setter :: proc(t: ^testing.T) {
 	Custom_Bool :: distinct bool
-	Custom_Data :: struct {
-		a: int,
-	}
 
 	S :: struct {
 		a: Custom_Data,
@@ -1240,16 +1257,6 @@ test_custom_type_setter :: proc(t: ^testing.T) {
 	}
 	s: S
 
-	// NOTE: Mind that this setter is global state, and the test runner is multi-threaded.
-	// It should be fine so long as all type setter tests are in this one test proc.
-	flags.register_type_setter(proc (data: rawptr, data_type: typeid, _, _: string) -> (string, bool, runtime.Allocator_Error) {
-		if data_type == Custom_Data {
-			(cast(^Custom_Data)data).a = 32
-			return "", true, nil
-		}
-		return "", false, nil
-	})
-	defer flags.register_type_setter(nil)
 	args := [?]string { "-a:hellope", "-b:true" }
 	result := flags.parse(&s, args[:])
 	testing.expect_value(t, result, nil)
diff --git a/tests/core/mem/test_mem_dynamic_pool.odin b/tests/core/mem/test_mem_dynamic_pool.odin
index d1086cfe6..fa204d3b1 100644
--- a/tests/core/mem/test_mem_dynamic_pool.odin
+++ b/tests/core/mem/test_mem_dynamic_pool.odin
@@ -6,7 +6,7 @@ import "core:mem"
 
 expect_pool_allocation :: proc(t: ^testing.T, expected_used_bytes, num_bytes, alignment: int) {
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, alignment = alignment)
+	mem.dynamic_pool_init(&pool, alignment = alignment)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 	element, err := mem.alloc(num_bytes, alignment, pool_allocator)
@@ -48,7 +48,7 @@ expect_pool_allocation_out_of_band :: proc(t: ^testing.T, num_bytes, out_band_si
 	testing.expect(t, num_bytes >= out_band_size, "Sanity check failed, your test call is flawed! Make sure that num_bytes >= out_band_size!")
 
 	pool: mem.Dynamic_Pool
-	mem.dynamic_pool_init(pool = &pool, out_band_size = out_band_size)
+	mem.dynamic_pool_init(&pool, out_band_size = out_band_size)
 	pool_allocator := mem.dynamic_pool_allocator(&pool)
 
 	element, err := mem.alloc(num_bytes, allocator = pool_allocator)
diff --git a/tests/core/normal.odin b/tests/core/normal.odin
index 6174f2d5c..0670842ac 100644
--- a/tests/core/normal.odin
+++ b/tests/core/normal.odin
@@ -39,6 +39,8 @@ download_assets :: proc() {
 @(require) import "slice"
 @(require) import "strconv"
 @(require) import "strings"
+@(require) import "sync"
+@(require) import "sync/chan"
 @(require) import "sys/posix"
 @(require) import "sys/windows"
 @(require) import "text/i18n"
diff --git a/tests/core/sync/chan/test_core_sync_chan.odin b/tests/core/sync/chan/test_core_sync_chan.odin
new file mode 100644
index 000000000..9b8d9b354
--- /dev/null
+++ b/tests/core/sync/chan/test_core_sync_chan.odin
@@ -0,0 +1,274 @@
+package test_core_sync_chan
+
+import "base:runtime"
+import "base:intrinsics"
+import "core:log"
+import "core:math/rand"
+import "core:sync/chan"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+
+Message_Type :: enum i32 {
+	Result,
+	Add,
+	Multiply,
+	Subtract,
+	Divide,
+	End,
+}
+
+Message :: struct {
+	type: Message_Type,
+	i: i64,
+}
+
+Comm :: struct {
+	host: chan.Chan(Message),
+	client: chan.Chan(Message),
+	manual_buffering: bool,
+}
+
+BUFFER_SIZE :: 8
+MAX_RAND    :: 32
+FAIL_TIME   :: 1 * time.Second
+SLEEP_TIME  :: 1 * time.Millisecond
+
+comm_client :: proc(th: ^thread.Thread) {
+	data := cast(^Comm)th.data
+	manual_buffering := data.manual_buffering
+
+	n: i64
+
+	for manual_buffering && !chan.can_recv(data.host) {
+		thread.yield()
+	}
+
+	recv_loop: for msg in chan.recv(data.host) {
+		#partial switch msg.type {
+		case .Add:      n += msg.i
+		case .Multiply: n *= msg.i
+		case .Subtract: n -= msg.i
+		case .Divide:   n /= msg.i
+		case .End:
+			break recv_loop
+		case:
+			panic("Unknown message type for client.")
+		}
+
+		for manual_buffering && !chan.can_recv(data.host) {
+			thread.yield()
+		}
+	}
+
+	for manual_buffering && !chan.can_send(data.host) {
+		thread.yield()
+	}
+
+	chan.send(data.client, Message{.Result, n})
+	chan.close(data.client)
+}
+
+send_messages :: proc(t: ^testing.T, host: chan.Chan(Message), manual_buffering: bool = false) -> (expected: i64) {
+	expected = 1
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.Add, 1})
+	log.debug(Message{.Add, 1})
+
+	for _ in 0..<1+2*BUFFER_SIZE {
+		msg: Message
+		msg.i = 1 + rand.int63_max(MAX_RAND)
+		switch rand.int_max(4) {
+		case 0:
+			msg.type = .Add
+			expected += msg.i
+		case 1:
+			msg.type = .Multiply
+			expected *= msg.i
+		case 2:
+			msg.type = .Subtract
+			expected -= msg.i
+		case 3:
+			msg.type = .Divide
+			expected /= msg.i
+		}
+
+		for manual_buffering && !chan.can_send(host) {
+			thread.yield()
+		}
+		if manual_buffering {
+			testing.expect(t, chan.len(host) == 0)
+		}
+
+		chan.send(host, msg)
+		log.debug(msg)
+	}
+
+	for manual_buffering && !chan.can_send(host) {
+		thread.yield()
+	}
+	chan.send(host, Message{.End, 0})
+	log.debug(Message{.End, 0})
+	chan.close(host)
+
+	return
+}
+
+@test
+test_chan_buffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_buffered(chan.Chan(Message), BUFFER_SIZE, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, chan.is_buffered(comm.host))
+	testing.expect(t, chan.is_buffered(comm.client))
+	testing.expect(t, !chan.is_unbuffered(comm.host))
+	testing.expect(t, !chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), BUFFER_SIZE)
+	testing.expect_value(t, chan.cap(comm.client), BUFFER_SIZE)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	expected := send_messages(t, comm.host, manual_buffering = false)
+
+	// Sleep so we can give the other thread enough time to buffer its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.len(comm.client), 1)
+	result, ok := chan.try_recv(comm.client)
+
+	// One more sleep to ensure it has enough time to close.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Make sure sending to closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_chan_unbuffered :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	comm: Comm
+	comm.manual_buffering = true
+	alloc_err: runtime.Allocator_Error
+	comm.host,   alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	comm.client, alloc_err = chan.create_unbuffered(chan.Chan(Message), context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer {
+		chan.destroy(comm.host)
+		chan.destroy(comm.client)
+	}
+
+	testing.expect(t, !chan.is_buffered(comm.host))
+	testing.expect(t, !chan.is_buffered(comm.client))
+	testing.expect(t, chan.is_unbuffered(comm.host))
+	testing.expect(t, chan.is_unbuffered(comm.client))
+	testing.expect_value(t, chan.len(comm.host), 0)
+	testing.expect_value(t, chan.len(comm.client), 0)
+	testing.expect_value(t, chan.cap(comm.host), 0)
+	testing.expect_value(t, chan.cap(comm.client), 0)
+
+	reckoner := thread.create(comm_client)
+	defer thread.destroy(reckoner)
+	reckoner.data = &comm
+	thread.start(reckoner)
+
+	for !chan.can_send(comm.client) {
+		thread.yield()
+	}
+
+	expected := send_messages(t, comm.host)
+	testing.expect_value(t, chan.is_closed(comm.host), true)
+
+	for !chan.can_recv(comm.client) {
+		thread.yield()
+	}
+
+	result, ok := chan.try_recv(comm.client)
+	testing.expect_value(t, ok, true)
+	testing.expect_value(t, result.i, expected)
+	log.debug(result, expected)
+
+	// Sleep so we can give the other thread enough time to close its side
+	// after we've received its message.
+	time.sleep(SLEEP_TIME)
+
+	testing.expect_value(t, chan.is_closed(comm.client), true)
+
+	// Make sure sending and receiving on closed channels fails.
+	testing.expect_value(t, chan.send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.send(comm.client, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.host, Message{.End, 0}), false)
+	testing.expect_value(t, chan.try_send(comm.client, Message{.End, 0}), false)
+	_, ok = chan.recv(comm.host);       testing.expect_value(t, ok, false)
+	_, ok = chan.recv(comm.client);     testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.host);   testing.expect_value(t, ok, false)
+	_, ok = chan.try_recv(comm.client); testing.expect_value(t, ok, false)
+}
+
+@test
+test_full_buffered_closed_chan_deadlock :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 1, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.close(ch))
+	testing.expect(t, !chan.send(ch, 32))
+}
+
+// This test guarantees a buffered channel's messages can still be received
+// even after closing. This is currently how the API works. If that changes,
+// this test will need to change.
+@test
+test_accept_message_from_closed_buffered_chan :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	ch, alloc_err := chan.create_buffered(chan.Chan(int), 2, context.allocator)
+	assert(alloc_err == nil, "allocation failed")
+	defer chan.destroy(ch)
+
+	testing.expect(t, chan.can_send(ch))
+	testing.expect(t, chan.send(ch, 32))
+	testing.expect(t, chan.send(ch, 64))
+	testing.expect(t, chan.close(ch))
+	result, ok := chan.recv(ch)
+	testing.expect_value(t, result, 32)
+	testing.expect(t, ok)
+	result, ok = chan.try_recv(ch)
+	testing.expect_value(t, result, 64)
+	testing.expect(t, ok)
+}
diff --git a/tests/core/sync/test_core_sync.odin b/tests/core/sync/test_core_sync.odin
new file mode 100644
index 000000000..d6a7a9517
--- /dev/null
+++ b/tests/core/sync/test_core_sync.odin
@@ -0,0 +1,714 @@
+// NOTE(Feoramund): These tests should be run a few hundred times, with and
+// without `-sanitize:thread` enabled, to ensure maximum safety.
+//
+// Keep in mind that running with the debug logs uncommented can result in
+// failures disappearing due to the delay of sending the log message causing
+// different synchronization patterns.
+
+package test_core_sync
+
+import "base:intrinsics"
+// import "core:log"
+import "core:sync"
+import "core:testing"
+import "core:thread"
+import "core:time"
+
+FAIL_TIME        :: 1 * time.Second
+SLEEP_TIME       :: 1 * time.Millisecond
+SMALL_SLEEP_TIME :: 10 * time.Microsecond
+
+// This needs to be high enough to cause a data race if any of the
+// synchronization primitives fail.
+THREADS :: 8
+
+// Manually wait on all threads to finish.
+//
+// This reduces a dependency on a `Wait_Group` or similar primitives.
+//
+// It's also important that we wait for every thread to finish, as it's
+// possible for a thread to finish after the test if we don't check, despite
+// joining it to the test thread.
+wait_for :: proc(threads: []^thread.Thread) {
+	wait_loop: for {
+		count := len(threads)
+		for v in threads {
+			if thread.is_done(v) {
+				count -= 1
+			}
+		}
+		if count == 0 {
+			break wait_loop
+		}
+		thread.yield()
+	}
+	for t in threads {
+		thread.join(t)
+		thread.destroy(t)
+	}
+}
+
+//
+// core:sync/primitives.odin
+//
+
+@test
+test_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("MUTEX-%v> locking", th.id)
+		sync.mutex_lock(&data.m)
+		data.number += 1
+		// log.debugf("MUTEX-%v> unlocking", th.id)
+		sync.mutex_unlock(&data.m)
+		// log.debugf("MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_rw_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m1: sync.RW_Mutex,
+		m2: sync.RW_Mutex,
+		number1: int,
+		number2: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.rw_mutex_shared_lock(&data.m1)
+		n := data.number1
+		sync.rw_mutex_shared_unlock(&data.m1)
+
+		sync.rw_mutex_lock(&data.m2)
+		data.number2 += n
+		sync.rw_mutex_unlock(&data.m2)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	sync.rw_mutex_lock(&data.m1)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.number1 = 1
+	sync.rw_mutex_unlock(&data.m1)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number2, THREADS)
+}
+
+@test
+test_recursive_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Recursive_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_MUTEX-%v> locking", th.id)
+		tried1 := sync.recursive_mutex_try_lock(&data.m)
+		for _ in 0..<3 {
+			sync.recursive_mutex_lock(&data.m)
+		}
+		tried2 := sync.recursive_mutex_try_lock(&data.m)
+		// log.debugf("REC_MUTEX-%v> locked", th.id)
+		data.number += 1
+		// log.debugf("REC_MUTEX-%v> unlocking", th.id)
+		for _ in 0..<3 {
+			sync.recursive_mutex_unlock(&data.m)
+		}
+		if tried1 { sync.recursive_mutex_unlock(&data.m) }
+		if tried2 { sync.recursive_mutex_unlock(&data.m) }
+		// log.debugf("REC_MUTEX-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		c: sync.Cond,
+		m: sync.Mutex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.mutex_lock(&data.m)
+
+		for intrinsics.atomic_load(&data.i) != 1 {
+			sync.cond_wait(&data.c, &data.m)
+		}
+
+		data.number += intrinsics.atomic_load(&data.i)
+
+		sync.mutex_unlock(&data.m)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+	data.i = -1
+
+	sync.mutex_lock(&data.m)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SLEEP_TIME)
+	data.i = 1
+	sync.mutex_unlock(&data.m)
+	sync.cond_broadcast(&data.c)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_cond_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	c: sync.Cond
+	m: sync.Mutex
+	sync.mutex_lock(&m)
+	sync.cond_wait_with_timeout(&c, &m, SLEEP_TIME)
+}
+
+@test
+test_semaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		s: sync.Sema,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("SEM-%v> waiting", th.id)
+		sync.sema_wait(&data.s)
+		data.number += 1
+		// log.debugf("SEM-%v> posting", th.id)
+		sync.sema_post(&data.s)
+		// log.debugf("SEM-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	sync.sema_post(&data.s)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_semaphore_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	s: sync.Sema
+	sync.sema_wait_with_timeout(&s, SLEEP_TIME)
+}
+
+@test
+test_futex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		f: sync.Futex,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("FUTEX-%v> waiting", th.id)
+		sync.futex_wait(&data.f, 3)
+		// log.debugf("FUTEX-%v> done", th.id)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+	}
+
+	data: Data
+	data.i = -1
+	data.f = 3
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	data.i = 1
+	// Change the futex variable to keep late-starters from stalling.
+	data.f = 0
+	sync.futex_broadcast(&data.f)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_futex_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	f: sync.Futex = 1
+	sync.futex_wait_with_timeout(&f, 1, SLEEP_TIME)
+}
+
+//
+// core:sync/extended.odin
+//
+
+@test
+test_wait_group :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		step1: sync.Wait_Group,
+		step2: sync.Wait_Group,
+		i: int,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.wait_group_wait(&data.step1)
+
+		n := data.i
+		intrinsics.atomic_add(&data.number, n)
+
+		sync.wait_group_done(&data.step2)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.wait_group_add(&data.step1, 1)
+	sync.wait_group_add(&data.step2, THREADS)
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.wait_group_done(&data.step1)
+
+	sync.wait_group_wait(&data.step2)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.step1.counter, 0)
+	testing.expect_value(t, data.step2.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_wait_group_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	wg: sync.Wait_Group
+	sync.wait_group_wait_with_timeout(&wg, SLEEP_TIME)
+}
+
+@test
+test_barrier :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Barrier,
+		i: int,
+		number: int,
+
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		sync.barrier_wait(&data.b)
+
+		intrinsics.atomic_add(&data.number, data.i)
+	}
+
+	data: Data
+	data.i = -1
+	threads: [THREADS]^thread.Thread
+
+	sync.barrier_init(&data.b, THREADS + 1) // +1 for this thread, of course.
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+	time.sleep(SMALL_SLEEP_TIME)
+	data.i = 1
+	sync.barrier_wait(&data.b)
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.index, 0)
+	testing.expect_value(t, data.b.generation_id, 1)
+	testing.expect_value(t, data.b.thread_count, THREADS + 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		a: sync.Auto_Reset_Event,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("AUR-%v> entering", th.id)
+		sync.auto_reset_event_wait(&data.a)
+		// log.debugf("AUR-%v> adding", th.id)
+		data.number += 1
+		// log.debugf("AUR-%v> signalling", th.id)
+		sync.auto_reset_event_signal(&data.a)
+		// log.debugf("AUR-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	// There is a chance that this test can stall if a signal is sent before
+	// all threads are queued, because it's possible for some number of threads
+	// to get to the waiting state, the signal to fire, all of the waited
+	// threads to pass successfully, then the other threads come in with no-one
+	// to run a signal.
+	//
+	// So we'll just test a fully-waited queue of cascading threads.
+	for {
+		status := intrinsics.atomic_load(&data.a.status)
+		if status == -THREADS {
+			// log.debug("All Auto_Reset_Event threads have queued.")
+			break
+		}
+		intrinsics.cpu_relax()
+	}
+
+	sync.auto_reset_event_signal(&data.a)
+
+	wait_for(threads[:])
+
+	// The last thread should leave this primitive in a signalled state.
+	testing.expect_value(t, data.a.status, 1)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_auto_reset_already_signalled :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	a: sync.Auto_Reset_Event
+	sync.auto_reset_event_signal(&a)
+	sync.auto_reset_event_wait(&a)
+	testing.expect_value(t, a.status, 0)
+}
+
+@test
+test_ticket_mutex :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		m: sync.Ticket_Mutex,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("TIC-%i> entering", th.id)
+		// intrinsics.debug_trap()
+		sync.ticket_mutex_lock(&data.m)
+		// log.debugf("TIC-%i> locked", th.id)
+		data.number += 1
+		// log.debugf("TIC-%i> unlocking", th.id)
+		sync.ticket_mutex_unlock(&data.m)
+		// log.debugf("TIC-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.m.ticket, THREADS)
+	testing.expect_value(t, data.m.serving, THREADS)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		sync.benaphore_lock(&data.b)
+		data.number += 1
+		sync.benaphore_unlock(&data.b)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_recursive_benaphore :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		b: sync.Recursive_Benaphore,
+		number: int,
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+
+		// log.debugf("REC_BEP-%i> entering", th.id)
+		tried1 := sync.recursive_benaphore_try_lock(&data.b)
+		for _ in 0..<3 {
+			sync.recursive_benaphore_lock(&data.b)
+		}
+		tried2 := sync.recursive_benaphore_try_lock(&data.b)
+		// log.debugf("REC_BEP-%i> locked", th.id)
+		data.number += 1
+		for _ in 0..<3 {
+			sync.recursive_benaphore_unlock(&data.b)
+		}
+		if tried1 { sync.recursive_benaphore_unlock(&data.b) }
+		if tried2 { sync.recursive_benaphore_unlock(&data.b) }
+		// log.debugf("REC_BEP-%i> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	// The benaphore should be unowned at the end.
+	testing.expect_value(t, data.b.counter, 0)
+	testing.expect_value(t, data.b.owner, 0)
+	testing.expect_value(t, data.b.recursion, 0)
+	testing.expect_value(t, data.number, THREADS)
+}
+
+@test
+test_once :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		once: sync.Once,
+		number: int,
+	}
+
+	write :: proc "contextless" (data: rawptr) {
+		data := cast(^Data)data
+		data.number += 1
+	}
+
+	p :: proc(th: ^thread.Thread) {
+		data := cast(^Data)th.data
+		// log.debugf("ONCE-%v> entering", th.id)
+		sync.once_do_with_data_contextless(&data.once, write, data)
+		// log.debugf("ONCE-%v> leaving", th.id)
+	}
+
+	data: Data
+	threads: [THREADS]^thread.Thread
+
+	for &v in threads {
+		v = thread.create(p)
+		v.data = &data
+		v.init_context = context
+		thread.start(v)
+	}
+
+	wait_for(threads[:])
+
+	testing.expect_value(t, data.once.done, true)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		car: sync.Parker,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.unpark(&data.car)
+		data.number += 1
+	})
+
+	sync.park(&data.car)
+
+	wait_for([]^thread.Thread{ th })
+
+	PARKER_EMPTY :: 0
+	testing.expect_value(t, data.car.state, PARKER_EMPTY)
+	testing.expect_value(t, data.number, 1)
+}
+
+@test
+test_park_with_timeout :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	car: sync.Parker
+	sync.park_with_timeout(&car, SLEEP_TIME)
+}
+
+@test
+test_one_shot_event :: proc(t: ^testing.T) {
+	testing.set_fail_timeout(t, FAIL_TIME)
+
+	Data :: struct {
+		event: sync.One_Shot_Event,
+		number: int,
+	}
+
+	data: Data
+
+	th := thread.create_and_start_with_data(&data, proc(data: rawptr) {
+		data := cast(^Data)data
+		time.sleep(SLEEP_TIME)
+		sync.one_shot_event_signal(&data.event)
+		data.number += 1
+	})
+
+	sync.one_shot_event_wait(&data.event)
+
+	wait_for([]^thread.Thread{ th })
+
+	testing.expect_value(t, data.event.state, 1)
+	testing.expect_value(t, data.number, 1)
+}
diff --git a/tests/core/sys/posix/structs.odin b/tests/core/sys/posix/structs.odin
index 0234e41c0..52716b545 100644
--- a/tests/core/sys/posix/structs.odin
+++ b/tests/core/sys/posix/structs.odin
@@ -63,6 +63,9 @@ execute_struct_checks :: proc(t: ^testing.T) {
 		waiting: for {
 			status: i32
 			wpid := posix.waitpid(pid, &status, {})
+			if status == posix.EINTR {
+				continue
+			}
 			if !testing.expectf(t, wpid != -1, "waitpid() failure: %v", posix.strerror()) {
 				return false
 			}
diff --git a/vendor/box2d/box2d.odin b/vendor/box2d/box2d.odin
index 081e0861b..c1d789273 100644
--- a/vendor/box2d/box2d.odin
+++ b/vendor/box2d/box2d.odin
@@ -3,7 +3,11 @@ package vendor_box2d
 import "base:intrinsics"
 import "core:c"
 
-@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) VECTOR_EXT :: "_simd" when #config(VENDOR_BOX2D_ENABLE_SIMD128, intrinsics.has_target_feature("simd128")) else ""
+} else {
+	@(private) VECTOR_EXT :: "avx2" when #config(VENDOR_BOX2D_ENABLE_AVX2, intrinsics.has_target_feature("avx2")) else "sse2"
+}
 
 when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_windows_amd64_" + VECTOR_EXT + ".lib"
@@ -13,6 +17,8 @@ when ODIN_OS == .Windows {
 	@(private) LIB_PATH :: "lib/box2d_darwin_amd64_" + VECTOR_EXT + ".a"
 } else when ODIN_ARCH == .amd64 {
 	@(private) LIB_PATH :: "lib/box2d_other_amd64_" + VECTOR_EXT + ".a"
+} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	@(private) LIB_PATH :: "lib/box2d_wasm" + VECTOR_EXT + ".o"
 } else {
 	@(private) LIB_PATH :: "lib/box2d_other.a"
 }
@@ -21,8 +27,16 @@ when !#exists(LIB_PATH) {
 	#panic("Could not find the compiled box2d libraries at \"" + LIB_PATH + "\", they can be compiled by running the `build.sh` script at `" + ODIN_ROOT + "vendor/box2d/build_box2d.sh\"`")
 }
 
-foreign import lib {
-	LIB_PATH,
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	when VECTOR_EXT == "_simd" {
+		foreign import lib "lib/box2d_wasm_simd.o"
+	} else {
+		foreign import lib "lib/box2d_wasm.o"
+	}
+} else {
+	foreign import lib {
+		LIB_PATH,
+	}
 }
 
 
@@ -1520,4 +1534,4 @@ IsValid :: proc{
 	Joint_IsValid,
 
 	IsValidRay,
-}
-\ No newline at end of file
+}
diff --git a/vendor/box2d/box2d_wasm.odin b/vendor/box2d/box2d_wasm.odin
new file mode 100644
index 000000000..eab369a0d
--- /dev/null
+++ b/vendor/box2d/box2d_wasm.odin
@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package vendor_box2d
+
+@(require) import _ "vendor:libc"
diff --git a/vendor/box2d/build_box2d.sh b/vendor/box2d/build_box2d.sh
index 3e74c0218..0c5ff999d 100755
--- a/vendor/box2d/build_box2d.sh
+++ b/vendor/box2d/build_box2d.sh
@@ -68,5 +68,7 @@ esac
 
 cd ..
 
+make -f wasm.Makefile
+
 rm -rf v3.0.0.tar.gz
 rm -rf box2d-3.0.0
diff --git a/vendor/box2d/lib/box2d_wasm.o b/vendor/box2d/lib/box2d_wasm.o
new file mode 100755
index 000000000..15d71c5a1
--- /dev/null
+++ b/vendor/box2d/lib/box2d_wasm.o
diff --git a/vendor/box2d/lib/box2d_wasm_simd.o b/vendor/box2d/lib/box2d_wasm_simd.o
new file mode 100755
index 000000000..568900bd9
--- /dev/null
+++ b/vendor/box2d/lib/box2d_wasm_simd.o
diff --git a/vendor/box2d/wasm.Makefile b/vendor/box2d/wasm.Makefile
new file mode 100644
index 000000000..929b61aea
--- /dev/null
+++ b/vendor/box2d/wasm.Makefile
@@ -0,0 +1,32 @@
+# Custom Makefile to build box2d for Odin's WASM targets.
+# I tried to make a cmake toolchain file for this / use cmake but this is far easier.
+# NOTE: We are pretending to be emscripten to box2d so it takes WASM code paths, but we don't actually use emscripten.
+
+# CC = $(shell brew --prefix llvm)/bin/clang
+# LD = $(shell brew --prefix llvm)/bin/wasm-ld
+
+VERSION   = 3.0.0
+SRCS      = $(wildcard box2d-$(VERSION)/src/*.c)
+OBJS_SIMD = $(SRCS:.c=_simd.o)
+OBJS      = $(SRCS:.c=.o)
+SYSROOT   = $(shell odin root)/vendor/libc
+CFLAGS    = -Ibox2d-$(VERSION)/include -Ibox2d-$(VERSION)/Extern/simde --target=wasm32 -D__EMSCRIPTEN__ -DNDEBUG -O3 --sysroot=$(SYSROOT)
+
+all: lib/box2d_wasm.o lib/box2d_wasm_simd.o clean
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $< -o $@
+
+%_simd.o: %.c
+	$(CC) -c $(CFLAGS) -msimd128 $< -o $@
+
+lib/box2d_wasm.o: $(OBJS)
+	$(LD) -r -o lib/box2d_wasm.o $(OBJS)
+
+lib/box2d_wasm_simd.o: $(OBJS_SIMD)
+	$(LD) -r -o lib/box2d_wasm_simd.o $(OBJS_SIMD)
+
+clean:
+	rm -rf $(OBJS) $(OBJS_SIMD)
+
+.PHONY: clean
diff --git a/vendor/cgltf/cgltf.odin b/vendor/cgltf/cgltf.odin
index f4518360d..a24c36d64 100644
--- a/vendor/cgltf/cgltf.odin
+++ b/vendor/cgltf/cgltf.odin
@@ -5,6 +5,7 @@ LIB :: (
 	     "lib/cgltf.lib"      when ODIN_OS == .Windows
 	else "lib/cgltf.a"        when ODIN_OS == .Linux
 	else "lib/darwin/cgltf.a" when ODIN_OS == .Darwin
+	else "lib/cgltf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -13,7 +14,11 @@ when LIB != "" {
 		// Windows library is shipped with the compiler, so a Windows specific message should not be needed.
 		#panic("Could not find the compiled cgltf library, it can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/cgltf/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "lib/cgltf_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 } else {
 	foreign import lib "system:cgltf"
diff --git a/vendor/cgltf/cgltf_wasm.odin b/vendor/cgltf/cgltf_wasm.odin
new file mode 100644
index 000000000..f2da86a2c
--- /dev/null
+++ b/vendor/cgltf/cgltf_wasm.odin
@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package cgltf
+
+@(require) import _ "vendor:libc"
diff --git a/vendor/cgltf/lib/cgltf_wasm.o b/vendor/cgltf/lib/cgltf_wasm.o
new file mode 100644
index 000000000..54346d176
--- /dev/null
+++ b/vendor/cgltf/lib/cgltf_wasm.o
diff --git a/vendor/cgltf/src/Makefile b/vendor/cgltf/src/Makefile
index ede3d158e..0dd450ce0 100644
--- a/vendor/cgltf/src/Makefile
+++ b/vendor/cgltf/src/Makefile
@@ -6,6 +6,10 @@ else
 all: unix
 endif
 
+wasm:
+	mkdir -p ../lib
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc cgltf.c -o ../lib/cgltf_wasm.o
+
 unix:
 	mkdir -p ../lib
 	$(CC) -c -O2 -Os -fPIC cgltf.c 	
diff --git a/vendor/libc/README.md b/vendor/libc/README.md
new file mode 100644
index 000000000..1dd50c5a9
--- /dev/null
+++ b/vendor/libc/README.md
@@ -0,0 +1,12 @@
+# vendor:libc
+
+A (very small) subset of a libc implementation over Odin libraries.
+This is mainly intended for use in Odin WASM builds to allow using libraries like box2d, cgltf etc. without emscripten hacks.
+
+You can use this with clang by doing `clang -c --target=wasm32 --sysroot=$(odin root)/vendor/libc` (+ all other flags and inputs).
+This will (if all the libc usage of the library is implemented) spit out a `.o` file you can use with the foreign import system.
+If you then also make sure this package is included in the Odin side of the project (`@(require) import "vendor:libc"`) you will be able
+compile to WASM like Odin expects.
+
+This is currently used by `vendor:box2d`, `vendor:stb/image`, `vendor:stb/truetype`, `vendor:stb/rect_pack`, and `vendor:cgltf`.
+You can see how building works by looking at those.
diff --git a/vendor/libc/assert.odin b/vendor/libc/assert.odin
new file mode 100644
index 000000000..c34bd1f2d
--- /dev/null
+++ b/vendor/libc/assert.odin
@@ -0,0 +1,15 @@
+package odin_libc
+
+import "base:runtime"
+
+@(require, linkage="strong", link_name="__odin_libc_assert_fail")
+__odin_libc_assert_fail :: proc "c" (func: cstring, file: cstring, line: i32, expr: cstring) -> ! {
+	context = g_ctx
+	loc := runtime.Source_Code_Location{
+		file_path = string(file),
+		line      = line,
+		column    = 0,
+		procedure = string(func),
+	}
+	context.assertion_failure_proc("runtime assertion", string(expr), loc)
+}
diff --git a/vendor/libc/include/assert.h b/vendor/libc/include/assert.h
new file mode 100644
index 000000000..a6fb6c696
--- /dev/null
+++ b/vendor/libc/include/assert.h
@@ -0,0 +1,16 @@
+#ifdef NDEBUG
+#define	assert(e)	((void)0)
+#else
+
+#ifdef __FILE_NAME__
+#define __ASSERT_FILE_NAME __FILE_NAME__
+#else /* __FILE_NAME__ */
+#define __ASSERT_FILE_NAME __FILE__
+#endif /* __FILE_NAME__ */
+
+void __odin_libc_assert_fail(const char *, const char *, int, const char *);
+
+#define	assert(e) \
+    (__builtin_expect(!(e), 0) ? __odin_libc_assert_fail(__func__, __ASSERT_FILE_NAME, __LINE__, #e) : (void)0)
+
+#endif /* NDEBUG */
diff --git a/vendor/libc/include/math.h b/vendor/libc/include/math.h
new file mode 100644
index 000000000..3f60d698f
--- /dev/null
+++ b/vendor/libc/include/math.h
@@ -0,0 +1,21 @@
+#include <stdbool.h>
+
+float sqrtf(float);
+float cosf(float);
+float sinf(float);
+float atan2f(float, float);
+bool isnan(float);
+bool isinf(float);
+double floor(double x);
+double ceil(double x);
+double sqrt(double x);
+double pow(double x, double y);
+double fmod(double x, double y);
+double cos(double x);
+double acos(double x);
+double fabs(double x);
+int abs(int);
+double ldexp(double, int);
+double exp(double);
+float log(float);
+float sin(float);
diff --git a/vendor/libc/include/stdio.h b/vendor/libc/include/stdio.h
new file mode 100644
index 000000000..807437f3c
--- /dev/null
+++ b/vendor/libc/include/stdio.h
@@ -0,0 +1,47 @@
+#include <stddef.h>
+#include <stdarg.h>
+
+#pragma once
+
+typedef struct {} FILE;
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define stdout ((FILE *)2)
+#define stderr ((FILE *)3)
+
+FILE *fopen(const char *, char *);
+int fclose(FILE *);
+int fseek(FILE *, long, int);
+long ftell(FILE *);
+size_t fread(void *, size_t, size_t, FILE *);
+size_t fwrite(const void *, size_t, size_t, FILE *);
+
+int vfprintf(FILE *, const char *, va_list);
+int vsnprintf(char *, size_t, const char *, va_list);
+
+static inline int snprintf(char *buf, size_t size, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int fprintf(FILE *f, const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(f, fmt, args);
+	va_end(args);
+	return result;
+}
+
+static inline int printf(const char *fmt, ...) {
+	va_list args;
+	va_start(args, fmt);
+	int result = vfprintf(stdout, fmt, args);
+	va_end(args);
+	return result;
+}
diff --git a/vendor/libc/include/stdlib.h b/vendor/libc/include/stdlib.h
new file mode 100644
index 000000000..22cfc528b
--- /dev/null
+++ b/vendor/libc/include/stdlib.h
@@ -0,0 +1,19 @@
+#include <stddef.h>
+
+void *malloc(size_t size);
+
+void *aligned_alloc(size_t alignment, size_t size);
+
+void free(void *);
+
+void *realloc(void *, size_t);
+
+void qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
+
+int atoi(const char *);
+long atol(const char *);
+long long atoll(const char *);
+
+double atof(const char *);
+
+long strtol(const char *, char **, int);
diff --git a/vendor/libc/include/string.h b/vendor/libc/include/string.h
new file mode 100644
index 000000000..4571f9454
--- /dev/null
+++ b/vendor/libc/include/string.h
@@ -0,0 +1,21 @@
+#include <stddef.h>
+
+void *memcpy(void *, const void *, size_t);
+void *memset(void *, int, size_t);
+void *memmove(void *, void *, size_t);
+int memcmp(const void *, const void *, size_t);
+
+unsigned long strlen(const char *str);
+
+char *strchr(const char *, int);
+char *strrchr(const char *, int);
+
+char *strncpy(char *, const char *, size_t);
+char *strcpy(char *, const char *);
+
+size_t strcspn(const char *, const char *);
+
+int strcmp(const char *, const char *);
+int strncmp(const char *, const char *, size_t);
+
+char *strstr(const char *, const char *);
diff --git a/vendor/libc/libc.odin b/vendor/libc/libc.odin
new file mode 100644
index 000000000..00d687109
--- /dev/null
+++ b/vendor/libc/libc.odin
@@ -0,0 +1,25 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:mem"
+
+@(private)
+g_ctx: runtime.Context
+@(private)
+g_allocator: mem.Compat_Allocator
+
+@(init)
+init_context :: proc() {
+	g_ctx = context
+
+	// Wrapping the allocator with the mem.Compat_Allocator so we can
+	// mimic the realloc semantics.
+	mem.compat_allocator_init(&g_allocator, g_ctx.allocator)
+	g_ctx.allocator = mem.compat_allocator(&g_allocator)
+}
+
+// NOTE: the allocator must respect an `old_size` of `-1` on resizes!
+set_context :: proc(ctx := context) {
+	g_ctx = ctx
+}
diff --git a/vendor/libc/math.odin b/vendor/libc/math.odin
new file mode 100644
index 000000000..59f42dd67
--- /dev/null
+++ b/vendor/libc/math.odin
@@ -0,0 +1,100 @@
+package odin_libc
+
+import "base:builtin"
+
+import "core:math"
+
+@(require, linkage="strong", link_name="sqrtf")
+sqrtf :: proc "c" (v: f32) -> f32 {
+	return math.sqrt(v)
+}
+
+@(require, linkage="strong", link_name="cosf")
+cosf :: proc "c" (v: f32) -> f32 {
+	return math.cos(v)
+}
+
+@(require, linkage="strong", link_name="sinf")
+sinf :: proc "c" (v: f32) -> f32 {
+	return math.sin(v)
+}
+
+@(require, linkage="strong", link_name="atan2f")
+atan2f :: proc "c" (v: f32, v2: f32) -> f32 {
+	return math.atan2(v, v2)
+}
+
+@(require, linkage="strong", link_name="isnan")
+isnan :: proc "c" (v: f32) -> bool {
+	return math.is_nan(v)
+}
+
+@(require, linkage="strong", link_name="isinf")
+isinf :: proc "c" (v: f32) -> bool {
+	return math.is_inf(v)
+}
+
+@(require, linkage="strong", link_name="sqrt")
+sqrt :: proc "c" (x: f64) -> f64 {
+	return math.sqrt(x)
+}
+
+@(require, linkage="strong", link_name="floor")
+floor :: proc "c" (x: f64) -> f64 {
+	return math.floor(x)
+}
+
+@(require, linkage="strong", link_name="ceil")
+ceil :: proc "c" (x: f64) -> f64 {
+	return math.ceil(x)
+}
+
+@(require, linkage="strong", link_name="pow")
+pow :: proc "c" (x, y: f64) -> f64 {
+	return math.pow(x, y)
+}
+
+@(require, linkage="strong", link_name="fmod")
+fmod :: proc "c" (x, y: f64) -> f64 {
+	return math.mod(x, y)
+}
+
+@(require, linkage="strong", link_name="cos")
+cos :: proc "c" (x: f64) -> f64 {
+	return math.cos(x)
+}
+
+@(require, linkage="strong", link_name="acos")
+acos :: proc "c" (x: f64) -> f64 {
+	return math.acos(x)
+}
+
+@(require, linkage="strong", link_name="fabs")
+fabs :: proc "c" (x: f64) -> f64 {
+	return math.abs(x)
+}
+
+@(require, linkage="strong", link_name="abs")
+abs :: proc "c" (x: i32) -> i32 {
+	return builtin.abs(x)
+}
+
+@(require, linkage="strong", link_name="ldexp")
+ldexp :: proc "c" (x: f64, y: i32) -> f64{
+	return math.ldexp(x, int(y))
+}
+
+@(require, linkage="strong", link_name="exp")
+exp :: proc "c" (x: f64) -> f64 {
+	return math.exp(x)
+}
+
+@(require, linkage="strong", link_name="log")
+log :: proc "c" (x: f32) -> f32 {
+	return math.ln(x)
+}
+
+@(require, linkage="strong", link_name="sin")
+sin :: proc "c" (x: f32) -> f32 {
+	return math.sin(x)
+}
diff --git a/vendor/libc/stdio.odin b/vendor/libc/stdio.odin
new file mode 100644
index 000000000..10b95b96b
--- /dev/null
+++ b/vendor/libc/stdio.odin
@@ -0,0 +1,106 @@
+package odin_libc
+
+import "core:c"
+import "core:io"
+import "core:os"
+
+import stb "vendor:stb/sprintf"
+
+FILE :: uintptr
+
+@(require, linkage="strong", link_name="fopen")
+fopen :: proc "c" (path: cstring, mode: cstring) -> FILE {
+	context = g_ctx
+	unimplemented("odin_libc.fopen")
+}
+
+@(require, linkage="strong", link_name="fseek")
+fseek :: proc "c" (file: FILE, offset: c.long, whence: i32) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	_, err := os.seek(handle, i64(offset), int(whence))
+	if err != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="ftell")
+ftell :: proc "c" (file: FILE) -> c.long {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	off, err := os.seek(handle, 0, os.SEEK_CUR)
+	if err != nil {
+		return -1
+	}
+	return c.long(off)
+}
+
+@(require, linkage="strong", link_name="fclose")
+fclose :: proc "c" (file: FILE) -> i32 {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	if os.close(handle) != nil {
+		return -1
+	}
+	return 0
+}
+
+@(require, linkage="strong", link_name="fread")
+fread :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.read(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="fwrite")
+fwrite :: proc "c" (buffer: [^]byte, size: uint, count: uint, file: FILE) -> uint {
+	context = g_ctx
+	handle := os.Handle(file-1)
+	n, _   := os.write(handle, buffer[:min(size, count)])
+	return uint(max(0, n))
+}
+
+@(require, linkage="strong", link_name="vsnprintf")
+vsnprintf :: proc "c" (buf: [^]byte, count: uint, fmt: cstring, args: ^c.va_list) -> i32 {
+	i32_count := i32(count)
+	assert_contextless(i32_count >= 0)
+	return stb.vsnprintf(buf, i32_count, fmt, args)
+}
+
+@(require, linkage="strong", link_name="vfprintf")
+vfprintf :: proc "c" (file: FILE, fmt: cstring, args: ^c.va_list) -> i32 {
+	context = g_ctx
+
+	handle := os.Handle(file-1)
+
+	MAX_STACK :: 4096
+
+	buf: []byte
+	stack_buf: [MAX_STACK]byte = ---
+	{
+		n := stb.vsnprintf(&stack_buf[0], MAX_STACK, fmt, args)
+		if n <= 0 {
+			return n
+		}
+
+		if n >= MAX_STACK {
+			buf = make([]byte, n)
+			n2 := stb.vsnprintf(raw_data(buf), i32(len(buf)), fmt, args)
+			assert(n == n2)
+		} else {
+			buf = stack_buf[:n]
+		}
+	}
+	defer if len(buf) > MAX_STACK {
+		delete(buf)
+	}
+
+	_, err := io.write_full(os.stream_from_handle(handle), buf)
+	if err != nil {
+		return -1
+	}
+
+	return i32(len(buf))
+}
diff --git a/vendor/libc/stdlib.odin b/vendor/libc/stdlib.odin
new file mode 100644
index 000000000..f898de619
--- /dev/null
+++ b/vendor/libc/stdlib.odin
@@ -0,0 +1,119 @@
+package odin_libc
+
+import "base:runtime"
+
+import "core:c"
+import "core:slice"
+import "core:sort"
+import "core:strconv"
+import "core:strings"
+
+@(require, linkage="strong", link_name="malloc")
+malloc :: proc "c" (size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="aligned_alloc")
+aligned_alloc :: proc "c" (alignment: uint, size: uint) -> rawptr {
+	context = g_ctx
+	ptr, err := runtime.mem_alloc_non_zeroed(int(size), int(alignment))
+	assert(err == nil, "allocation failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="free")
+free :: proc "c" (ptr: rawptr) {
+	context = g_ctx
+	runtime.mem_free(ptr)
+}
+
+@(require, linkage="strong", link_name="realloc")
+realloc :: proc "c" (ptr: rawptr, new_size: uint) -> rawptr {
+	context = g_ctx
+	// -1 for the old_size, assumed to be wrapped with the mem.Compat_Allocator to get the right size.
+	// Note that realloc does not actually care about alignment and is allowed to just align it to something
+	// else than the original allocation.
+	ptr, err := runtime.non_zero_mem_resize(ptr, -1, int(new_size))
+	assert(err != nil, "realloc failure")
+	return raw_data(ptr)
+}
+
+@(require, linkage="strong", link_name="qsort")
+qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
+	context = g_ctx
+
+	Inputs :: struct {
+		base: rawptr,
+		num:  uint,
+		size: uint,
+		cmp:  proc "c" (a, b: rawptr) -> i32,
+	}
+
+	sort.sort({
+		collection = &Inputs{base, num, size, cmp},
+		len = proc(it: sort.Interface) -> int {
+			inputs := (^Inputs)(it.collection)
+			return int(inputs.num)
+		},
+		less = proc(it: sort.Interface, i, j: int) -> bool {
+			inputs := (^Inputs)(it.collection)
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+			return inputs.cmp(a, b) < 0
+		},
+		swap = proc(it: sort.Interface, i, j: int) {
+			inputs := (^Inputs)(it.collection)
+
+			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
+			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
+
+			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
+		},
+	})
+}
+
+@(require, linkage="strong", link_name="atoi")
+atoi :: proc "c" (str: cstring) -> i32 {
+	return i32(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atol")
+atol :: proc "c" (str: cstring) -> c.long {
+	return c.long(atoll(str))
+}
+
+@(require, linkage="strong", link_name="atoll")
+atoll :: proc "c" (str: cstring) -> c.longlong {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	i, _ := strconv.parse_i64_of_base(sstr, 10)
+	return c.longlong(i)
+}
+
+@(require, linkage="strong", link_name="atof")
+atof :: proc "c" (str: cstring) -> f64 {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+	f, _ := strconv.parse_f64(sstr)
+	return f
+}
+
+@(require, linkage="strong", link_name="strtol")
+strtol :: proc "c" (str: cstring, str_end: ^cstring, base: i32) -> c.long {
+	context = g_ctx
+
+	sstr := string(str)
+	sstr  = strings.trim_left_space(sstr)
+
+	n: int
+	i, _ := strconv.parse_i64_of_base(sstr, int(base), &n)
+	str_end ^= cstring(raw_data(sstr)[n:])
+	return c.long(clamp(i, i64(min(c.long)), i64(max(c.long))))
+}
diff --git a/vendor/libc/string.odin b/vendor/libc/string.odin
new file mode 100644
index 000000000..1ab0803da
--- /dev/null
+++ b/vendor/libc/string.odin
@@ -0,0 +1,111 @@
+package odin_libc
+
+import "base:intrinsics"
+
+import "core:c"
+import "core:strings"
+import "core:mem"
+
+// NOTE: already defined by Odin.
+// void *memcpy(void *, const void *, size_t);
+// void *memset(void *, int, size_t);
+
+@(require, linkage="strong", link_name="memcmp")
+memcmp :: proc "c" (lhs: [^]byte, rhs: [^]byte, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	return i32(mem.compare(lhs[:icount], rhs[:icount]))
+}
+
+@(require, linkage="strong", link_name="strlen")
+strlen :: proc "c" (str: cstring) -> c.ulong {
+	return c.ulong(len(str))
+}
+
+@(require, linkage="strong", link_name="strchr")
+strchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strrchr")
+strrchr :: proc "c" (str: cstring, ch: i32) -> cstring {
+	bch  := u8(ch)
+	sstr := string(str)
+	if bch == 0 {
+		return cstring(raw_data(sstr)[len(sstr):])
+	}
+
+	idx := strings.last_index_byte(sstr, bch)
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(raw_data(sstr)[idx:])
+}
+
+@(require, linkage="strong", link_name="strncpy")
+strncpy :: proc "c" (dst: [^]byte, src: cstring, count: uint) -> cstring {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	cnt := min(len(src), icount)
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), cnt)
+	intrinsics.mem_zero(dst, icount-cnt)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcpy")
+strcpy :: proc "c" (dst: [^]byte, src: cstring) -> cstring {
+	intrinsics.mem_copy_non_overlapping(dst, rawptr(src), len(src)+1)
+	return cstring(dst)
+}
+
+@(require, linkage="strong", link_name="strcspn")
+strcspn :: proc "c" (dst: cstring, src: cstring) -> uint {
+	context = g_ctx
+	sdst := string(dst)
+	idx := strings.index_any(sdst, string(src))
+	if idx == -1 {
+		return len(sdst)
+	}
+	return uint(idx)
+}
+
+@(require, linkage="strong", link_name="strncmp")
+strncmp :: proc "c" (lhs: cstring, rhs: cstring, count: uint) -> i32 {
+	icount := int(count)
+	assert_contextless(icount >= 0)
+	lhss := strings.string_from_null_terminated_ptr(([^]byte)(lhs), icount)
+	rhss := strings.string_from_null_terminated_ptr(([^]byte)(rhs), icount)
+	return i32(strings.compare(lhss, rhss))
+}
+
+@(require, linkage="strong", link_name="strcmp")
+strcmp :: proc "c" (lhs: cstring, rhs: cstring) -> i32 {
+	return i32(strings.compare(string(lhs), string(rhs)))
+}
+
+@(require, linkage="strong", link_name="strstr")
+strstr :: proc "c" (str: cstring, substr: cstring) -> cstring {
+	if substr == "" {
+		return str
+	}
+
+	idx := strings.index(string(str), string(substr))
+	if idx < 0 {
+		return nil
+	}
+
+	return cstring(([^]byte)(str)[idx:])
+}
+
diff --git a/vendor/stb/image/stb_image.odin b/vendor/stb/image/stb_image.odin
index 828a1c2bd..85d612354 100644
--- a/vendor/stb/image/stb_image.odin
+++ b/vendor/stb/image/stb_image.odin
@@ -7,6 +7,7 @@ LIB :: (
 	     "../lib/stb_image.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,12 +16,19 @@ when LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbi "../lib/stb_image_wasm.o"
+	foreign import stbi { LIB }
+} else when LIB != "" {
 	foreign import stbi { LIB }
 } else {
 	foreign import stbi "system:stb_image"
 }
 
+NO_STDIO :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+
 #assert(size_of(c.int) == size_of(b32))
 #assert(size_of(b32) == size_of(c.int))
 
@@ -33,14 +41,48 @@ Io_Callbacks :: struct {
 	eof:  proc "c" (user: rawptr) -> c.int,                             // returns nonzero if we are at end of file/data
 }
 
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbi {
+		////////////////////////////////////
+		//
+		// 8-bits-per-channel interface
+		//
+		load           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+		load_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
+
+		////////////////////////////////////
+		//
+		// 16-bits-per-channel interface
+		//
+		load_16           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+		load_16_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
+
+		////////////////////////////////////
+		//
+		// float-per-channel interface
+		//
+		loadf           :: proc(filename: cstring, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+		loadf_from_file :: proc(f: ^c.FILE,        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
+
+		is_hdr           :: proc(filename: cstring) -> c.int ---
+		is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
+
+		// get image dimensions & components without fully decoding
+		info           :: proc(filename: cstring, x, y, comp: ^c.int) -> c.int ---
+		info_from_file :: proc(f: ^c.FILE,        x, y, comp: ^c.int) -> c.int ---
+
+		is_16_bit           :: proc(filename: cstring) -> b32 ---
+		is_16_bit_from_file :: proc(f: ^c.FILE)        -> b32 ---
+	}
+}
+
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbi {
 	////////////////////////////////////
 	//
 	// 8-bits-per-channel interface
 	//
-	load                :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
-	load_from_file      :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 	load_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]byte ---
 
@@ -50,8 +92,6 @@ foreign stbi {
 	//
 	// 16-bits-per-channel interface
 	//
-	load_16                :: proc(filename: cstring,           x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
-	load_16_from_file      :: proc(f: ^c.FILE,                  x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_memory    :: proc(buffer: [^]byte, len: c.int, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 	load_16_from_callbacks :: proc(clbk: ^Io_Callbacks,         x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]u16 ---
 
@@ -59,8 +99,6 @@ foreign stbi {
 	//
 	// float-per-channel interface
 	//
-	loadf                 :: proc(filename: cstring,                 x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
-	loadf_from_file       :: proc(f: ^c.FILE,                        x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_memory     :: proc(buffer: [^]byte, len: c.int,       x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 	loadf_from_callbacks  :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, channels_in_file: ^c.int, desired_channels: c.int) -> [^]f32 ---
 
@@ -73,9 +111,6 @@ foreign stbi {
 	is_hdr_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr) -> c.int ---
 	is_hdr_from_memory    :: proc(buffer: [^]byte, len: c.int)       -> c.int ---
 
-	is_hdr           :: proc(filename: cstring) -> c.int ---
-	is_hdr_from_file :: proc(f: ^c.FILE)        -> c.int ---
-
 	// get a VERY brief reason for failure
 	// NOT THREADSAFE
 	failure_reason :: proc() -> cstring ---
@@ -84,13 +119,9 @@ foreign stbi {
 	image_free :: proc(retval_from_load: rawptr) ---
 
 	// get image dimensions & components without fully decoding
-	info                :: proc(filename: cstring,                 x, y, comp: ^c.int) -> c.int ---
-	info_from_file      :: proc(f: ^c.FILE,                        x, y, comp: ^c.int) -> c.int ---
 	info_from_memory    :: proc(buffer: [^]byte, len: c.int,       x, y, comp: ^c.int) -> c.int ---
 	info_from_callbacks :: proc(clbk: ^Io_Callbacks, user: rawptr, x, y, comp: ^c.int) -> c.int ---
 	
-	is_16_bit             :: proc(filename: cstring) -> b32 ---
-	is_16_bit_from_file   :: proc(f: ^c.FILE) -> b32 ---
 	is_16_bit_from_memory :: proc(buffer: [^]byte, len: c.int) -> c.int ---
 
 	// for image formats that explicitly notate that they have premultiplied alpha,
diff --git a/vendor/stb/image/stb_image_resize.odin b/vendor/stb/image/stb_image_resize.odin
index e22b587b2..a37c2e243 100644
--- a/vendor/stb/image/stb_image_resize.odin
+++ b/vendor/stb/image/stb_image_resize.odin
@@ -7,6 +7,7 @@ RESIZE_LIB :: (
 	     "../lib/stb_image_resize.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_resize.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_resize.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_resize_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,7 +16,11 @@ when RESIZE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_image_resize_wasm.o"
+} else when RESIZE_LIB != "" {
 	foreign import lib { RESIZE_LIB }
 } else {
 	foreign import lib "system:stb_image_resize"
diff --git a/vendor/stb/image/stb_image_wasm.odin b/vendor/stb/image/stb_image_wasm.odin
new file mode 100644
index 000000000..77bb44f02
--- /dev/null
+++ b/vendor/stb/image/stb_image_wasm.odin
@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_image
+
+@(require) import _ "vendor:libc"
diff --git a/vendor/stb/image/stb_image_write.odin b/vendor/stb/image/stb_image_write.odin
index f030f1e28..a0c0b57a0 100644
--- a/vendor/stb/image/stb_image_write.odin
+++ b/vendor/stb/image/stb_image_write.odin
@@ -7,6 +7,7 @@ WRITE_LIB :: (
 	     "../lib/stb_image_write.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_image_write.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_image_write.a" when ODIN_OS == .Darwin
+	else "../lib/stb_image_write_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,7 +16,11 @@ when WRITE_LIB != "" {
 		// The STB libraries are shipped with the compiler on Windows so a Windows specific message should not be needed.
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbiw "../lib/stb_image_write_wasm.o"
+} else when WRITE_LIB != "" {
 	foreign import stbiw { WRITE_LIB }
 } else {
 	foreign import stbiw "system:stb_image_write"
@@ -25,12 +30,6 @@ write_func :: proc "c" (ctx: rawptr, data: rawptr, size: c.int)
 
 @(default_calling_convention="c", link_prefix="stbi_")
 foreign stbiw {
-	write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
-	write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
-	write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
-	write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
-	
 	write_png_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
 	write_bmp_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
 	write_tga_to_func :: proc(func: write_func, ctx: rawptr, w, h, comp: c.int, data: rawptr)                             -> c.int ---
@@ -39,3 +38,14 @@ foreign stbiw {
 	
 	flip_vertically_on_write :: proc(flip_boolean: b32) ---
 }
+
+when !NO_STDIO {
+	@(default_calling_convention="c", link_prefix="stbi_")
+	foreign stbiw {
+		write_png :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, stride_in_bytes: c.int)     -> c.int ---
+		write_bmp :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_tga :: proc(filename: cstring, w, h, comp: c.int, data: rawptr)                             -> c.int ---
+		write_hdr :: proc(filename: cstring, w, h, comp: c.int, data: [^]f32)                             -> c.int ---
+		write_jpg :: proc(filename: cstring, w, h, comp: c.int, data: rawptr, quality: c.int /*0..=100*/) -> c.int ---
+	}
+}
diff --git a/vendor/stb/lib/stb_image_resize_wasm.o b/vendor/stb/lib/stb_image_resize_wasm.o
new file mode 100644
index 000000000..1bf91266b
--- /dev/null
+++ b/vendor/stb/lib/stb_image_resize_wasm.o
diff --git a/vendor/stb/lib/stb_image_wasm.o b/vendor/stb/lib/stb_image_wasm.o
new file mode 100644
index 000000000..cd7fa31ce
--- /dev/null
+++ b/vendor/stb/lib/stb_image_wasm.o
diff --git a/vendor/stb/lib/stb_image_write_wasm.o b/vendor/stb/lib/stb_image_write_wasm.o
new file mode 100644
index 000000000..279803330
--- /dev/null
+++ b/vendor/stb/lib/stb_image_write_wasm.o
diff --git a/vendor/stb/lib/stb_rect_pack_wasm.o b/vendor/stb/lib/stb_rect_pack_wasm.o
new file mode 100644
index 000000000..fdd6d95d6
--- /dev/null
+++ b/vendor/stb/lib/stb_rect_pack_wasm.o
diff --git a/vendor/stb/lib/stb_sprintf_wasm.o b/vendor/stb/lib/stb_sprintf_wasm.o
new file mode 100644
index 000000000..4c8e140e5
--- /dev/null
+++ b/vendor/stb/lib/stb_sprintf_wasm.o
diff --git a/vendor/stb/lib/stb_truetype_wasm.o b/vendor/stb/lib/stb_truetype_wasm.o
index d3380e8a2..e545ef8e7 100644
--- a/vendor/stb/lib/stb_truetype_wasm.o
+++ b/vendor/stb/lib/stb_truetype_wasm.o
diff --git a/vendor/stb/rect_pack/stb_rect_pack.odin b/vendor/stb/rect_pack/stb_rect_pack.odin
index 6c0b56378..696b9c8c0 100644
--- a/vendor/stb/rect_pack/stb_rect_pack.odin
+++ b/vendor/stb/rect_pack/stb_rect_pack.odin
@@ -9,6 +9,7 @@ LIB :: (
 	     "../lib/stb_rect_pack.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_rect_pack.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_rect_pack.a" when ODIN_OS == .Darwin
+	else "../lib/stb_rect_pack_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -16,7 +17,11 @@ when LIB != "" {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import lib "../lib/stb_rect_pack_wasm.o"
+} else when LIB != "" {
 	foreign import lib { LIB }
 } else {
 	foreign import lib "system:stb_rect_pack"
diff --git a/vendor/stb/rect_pack/stb_rect_pack_wasm.odin b/vendor/stb/rect_pack/stb_rect_pack_wasm.odin
new file mode 100644
index 000000000..fb75552ec
--- /dev/null
+++ b/vendor/stb/rect_pack/stb_rect_pack_wasm.odin
@@ -0,0 +1,4 @@
+//+build wasm32, wasm64p32
+package stb_rect_pack
+
+@(require) import _ "vendor:libc"
diff --git a/vendor/stb/sprintf/stb_sprintf.odin b/vendor/stb/sprintf/stb_sprintf.odin
new file mode 100644
index 000000000..ec5036e45
--- /dev/null
+++ b/vendor/stb/sprintf/stb_sprintf.odin
@@ -0,0 +1,37 @@
+package stb_sprintf
+
+import "core:c"
+
+@(private)
+LIB :: (
+	     "../lib/stb_sprintf.lib"      when ODIN_OS == .Windows
+	else "../lib/stb_sprintf.a"        when ODIN_OS == .Linux
+	else "../lib/darwin/stb_sprintf.a" when ODIN_OS == .Darwin
+	else "../lib/stb_sprintf_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+	else ""
+)
+
+when LIB != "" {
+	when !#exists(LIB) {
+		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
+	}
+}
+
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+	foreign import stbpf "../lib/stb_sprintf_wasm.o"
+} else when LIB != "" {
+	foreign import stbpf { LIB }
+} else {
+	foreign import stbpf "system:stb_sprintf"
+}
+
+@(link_prefix="stbsp_", default_calling_convention="c")
+foreign stbpf {
+	sprintf    :: proc(buf: [^]byte, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	snprintf   :: proc(buf: [^]byte, count: i32, fmt: cstring, #c_vararg args: ..any) -> i32 ---
+	vsprintf   :: proc(buf: [^]byte, fmt: cstring, va: c.va_list) -> i32 ---
+	vsnprintf  :: proc(buf: [^]byte, count: i32, fmt: cstring, va: ^c.va_list) -> i32 ---
+	vsprintfcb :: proc(callback: SPRINTFCB, user: rawptr, buf: [^]byte, fmt: cstring, va: ^c.va_list) -> i32 ---
+}
+
+SPRINTFCB :: #type proc "c" (buf: [^]byte, user: rawptr, len: i32) -> cstring
diff --git a/vendor/stb/src/Makefile b/vendor/stb/src/Makefile
index b7217d528..194ea5e75 100644
--- a/vendor/stb/src/Makefile
+++ b/vendor/stb/src/Makefile
@@ -8,17 +8,24 @@ endif
 
 wasm:
 	mkdir -p ../lib
-	$(CC) -c -Os --target=wasm32 -nostdlib stb_truetype_wasm.c -o ../lib/stb_truetype_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image.c        -o ../lib/stb_image_wasm.o        -DSTBI_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_write.c  -o ../lib/stb_image_write_wasm.o  -DSTBI_WRITE_NO_STDIO 
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_image_resize.c -o ../lib/stb_image_resize_wasm.o
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_truetype.c     -o ../lib/stb_truetype_wasm.o
+	# $(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_vorbis.c       -o ../lib/stb_vorbis_wasm.o       -DSTB_VORBIS_NO_STDIO
+	$(CC) -c -Os --target=wasm32 --sysroot=$(shell odin root)/vendor/libc stb_rect_pack.c    -o ../lib/stb_rect_pack_wasm.o
+	$(CC) -c -Os --target=wasm32                                          stb_sprintf.c      -o ../lib/stb_sprintf_wasm.o
 
 unix:
 	mkdir -p ../lib
-	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c
+	$(CC) -c -O2 -Os -fPIC stb_image.c stb_image_write.c stb_image_resize.c stb_truetype.c stb_rect_pack.c stb_vorbis.c stb_sprintf.c
 	$(AR) rcs ../lib/stb_image.a        stb_image.o
 	$(AR) rcs ../lib/stb_image_write.a  stb_image_write.o
 	$(AR) rcs ../lib/stb_image_resize.a stb_image_resize.o
 	$(AR) rcs ../lib/stb_truetype.a     stb_truetype.o
 	$(AR) rcs ../lib/stb_rect_pack.a    stb_rect_pack.o
 	$(AR) rcs ../lib/stb_vorbis.a       stb_vorbis.o
+	$(AR) rcs ../lib/stb_sprintf.a      stb_sprintf.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image.so         -o ../lib/stb_image.so        stb_image.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_write.so   -o ../lib/stb_image_write.so  stb_image_write.o
 	#$(CC) -fPIC -shared -Wl,-soname=stb_image_resize.so  -o ../lib/stb_image_resize.so stb_image_resize.o
@@ -47,4 +54,7 @@ darwin:
 	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-x86_64.o -mmacosx-version-min=10.12
 	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_vorbis.c -o stb_vorbis-arm64.o -mmacosx-version-min=10.12
 	lipo -create stb_vorbis-x86_64.o stb_vorbis-arm64.o -output ../lib/darwin/stb_vorbis.a
+	$(CC) -arch x86_64 -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-x86_64.o -mmacosx-version-min=10.12
+	$(CC) -arch arm64  -c -O2 -Os -fPIC stb_sprintf.c -o stb_sprintf-arm64.o -mmacosx-version-min=10.12
+	lipo -create stb_sprintf-x86_64.o stb_sprintf-arm64.o -output ../lib/darwin/stb_sprintf.a
 	rm *.o
diff --git a/vendor/stb/src/stb_sprintf.c b/vendor/stb/src/stb_sprintf.c
new file mode 100644
index 000000000..d60a91bae
--- /dev/null
+++ b/vendor/stb/src/stb_sprintf.c
@@ -0,0 +1,2 @@
+#define STB_SPRINTF_IMPLEMENTATION
+#include "stb_sprintf.h"
diff --git a/vendor/stb/src/stb_sprintf.h b/vendor/stb/src/stb_sprintf.h
new file mode 100644
index 000000000..ca432a6bc
--- /dev/null
+++ b/vendor/stb/src/stb_sprintf.h
@@ -0,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/vendor/stb/src/stb_truetype_wasm.c b/vendor/stb/src/stb_truetype_wasm.c
deleted file mode 100644
index e0b1fdc77..000000000
--- a/vendor/stb/src/stb_truetype_wasm.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <stddef.h>
-
-void *stbtt_malloc(size_t size);
-void stbtt_free(void *ptr);
-
-void stbtt_qsort(void* base, size_t num, size_t size, int (*compare)(const void*, const void*));
-
-double stbtt_floor(double x);
-double stbtt_ceil(double x);
-double stbtt_sqrt(double x);
-double stbtt_pow(double x, double y);
-double stbtt_fmod(double x, double y);
-double stbtt_cos(double x);
-double stbtt_acos(double x);
-double stbtt_fabs(double x);
-
-unsigned long stbtt_strlen(const char *str);
-
-void *memcpy(void *dst, const void *src, size_t count);
-void *memset(void *dst, int x, size_t count);
-
-#define STBRP_SORT stbtt_qsort
-#define STBRP_ASSERT(condition) ((void)0)
-
-#define STBTT_malloc(x,u)  ((void)(u),stbtt_malloc(x))
-#define STBTT_free(x,u)    ((void)(u),stbtt_free(x))
-
-#define STBTT_assert(condition) ((void)0)
-
-#define STBTT_ifloor(x)   ((int) stbtt_floor(x))
-#define STBTT_iceil(x)    ((int) stbtt_ceil(x))
-#define STBTT_sqrt(x)      stbtt_sqrt(x)
-#define STBTT_pow(x,y)     stbtt_pow(x,y)
-#define STBTT_fmod(x,y)    stbtt_fmod(x,y)
-#define STBTT_cos(x)       stbtt_cos(x)
-#define STBTT_acos(x)      stbtt_acos(x)
-#define STBTT_fabs(x)      stbtt_fabs(x)
-#define STBTT_strlen(x)    stbtt_strlen(x)
-#define STBTT_memcpy       memcpy
-#define STBTT_memset       memset
-
-#define STB_RECT_PACK_IMPLEMENTATION
-#include "stb_rect_pack.h"
-
-#define STB_TRUETYPE_IMPLEMENTATION
-#include "stb_truetype.h"
diff --git a/vendor/stb/truetype/stb_truetype.odin b/vendor/stb/truetype/stb_truetype.odin
index e6defff5f..f1dcdf2a2 100644
--- a/vendor/stb/truetype/stb_truetype.odin
+++ b/vendor/stb/truetype/stb_truetype.odin
@@ -8,6 +8,7 @@ LIB :: (
 	     "../lib/stb_truetype.lib"      when ODIN_OS == .Windows
 	else "../lib/stb_truetype.a"        when ODIN_OS == .Linux
 	else "../lib/darwin/stb_truetype.a" when ODIN_OS == .Darwin
+	else "../lib/stb_truetype_wasm.o"   when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
 	else ""
 )
 
@@ -15,10 +16,12 @@ when LIB != "" {
 	when !#exists(LIB) {
 		#panic("Could not find the compiled STB libraries, they can be compiled by running `make -C \"" + ODIN_ROOT + "vendor/stb/src\"`")
 	}
+}
 
-	foreign import stbtt { LIB }
-} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
 	foreign import stbtt "../lib/stb_truetype_wasm.o"
+} else when LIB != "" {
+	foreign import stbtt { LIB }
 } else {
 	foreign import stbtt "system:stb_truetype"
 }
diff --git a/vendor/stb/truetype/stb_truetype_wasm.odin b/vendor/stb/truetype/stb_truetype_wasm.odin
index 0b5ff24bd..f36239086 100644
--- a/vendor/stb/truetype/stb_truetype_wasm.odin
+++ b/vendor/stb/truetype/stb_truetype_wasm.odin
@@ -1,82 +1,4 @@
 #+build wasm32, wasm64p32
 package stb_truetype
 
-import "base:builtin"
-import "base:intrinsics"
-import "base:runtime"
-
-import "core:c"
-import "core:math"
-import "core:slice"
-import "core:sort"
-
-@(require, linkage="strong", link_name="stbtt_malloc")
-malloc :: proc "c" (size: uint) -> rawptr {
-	context = runtime.default_context()
-	ptr, _ := runtime.mem_alloc_non_zeroed(int(size))
-	return raw_data(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_free")
-free :: proc "c" (ptr: rawptr) {
-	context = runtime.default_context()
-	builtin.free(ptr)
-}
-
-@(require, linkage="strong", link_name="stbtt_qsort")
-qsort :: proc "c" (base: rawptr, num: uint, size: uint, cmp: proc "c" (a, b: rawptr) -> i32) {
-	context = runtime.default_context()
-
-	Inputs :: struct {
-		base: rawptr,
-		num:  uint,
-		size: uint,
-		cmp:  proc "c" (a, b: rawptr) -> i32,
-	}
-
-	sort.sort({
-		collection = &Inputs{base, num, size, cmp},
-		len = proc(it: sort.Interface) -> int {
-			inputs := (^Inputs)(it.collection)
-			return int(inputs.num)
-		},
-		less = proc(it: sort.Interface, i, j: int) -> bool {
-			inputs := (^Inputs)(it.collection)
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-			return inputs.cmp(a, b) < 0
-		},
-		swap = proc(it: sort.Interface, i, j: int) {
-			inputs := (^Inputs)(it.collection)
-
-			a := rawptr(uintptr(inputs.base) + (uintptr(i) * uintptr(inputs.size)))
-			b := rawptr(uintptr(inputs.base) + (uintptr(j) * uintptr(inputs.size)))
-
-			slice.ptr_swap_non_overlapping(a, b, int(inputs.size))
-		},
-	})
-}
-
-@(require, linkage="strong", link_name="stbtt_floor")
-floor :: proc "c" (x: f64) -> f64 { return math.floor(x) }
-@(require, linkage="strong", link_name="stbtt_ceil")
-ceil :: proc "c" (x: f64) -> f64 { return math.ceil(x) }
-@(require, linkage="strong", link_name="stbtt_sqrt")
-sqrt :: proc "c" (x: f64) -> f64 { return math.sqrt(x) }
-@(require, linkage="strong", link_name="stbtt_pow")
-pow :: proc "c" (x, y: f64) -> f64 { return math.pow(x, y) }
-@(require, linkage="strong", link_name="stbtt_fmod")
-fmod :: proc "c" (x, y: f64) -> f64 { return math.mod(x, y) }
-@(require, linkage="strong", link_name="stbtt_cos")
-cos :: proc "c" (x: f64) -> f64 { return math.cos(x) }
-@(require, linkage="strong", link_name="stbtt_acos")
-acos :: proc "c" (x: f64) -> f64 { return math.acos(x) }
-@(require, linkage="strong", link_name="stbtt_fabs")
-fabs :: proc "c" (x: f64) -> f64 { return math.abs(x) }
-
-@(require, linkage="strong", link_name="stbtt_strlen")
-strlen :: proc "c" (str: cstring) -> c.ulong { return c.ulong(len(str)) }
-
-// NOTE: defined in runtime.
-// void *memcpy(void *dst, const void *src, size_t count);
-// void *memset(void *dst, int x, size_t count);
+@(require) import _ "vendor:libc"
author	Karl Zylinski <karl@zylinski.se>	2024-09-17 19:36:17 +0200
committer	Karl Zylinski <karl@zylinski.se>	2024-09-17 19:36:17 +0200
commit	093ade050445b3e348177e30fb1fc9d726f7b289 (patch)
tree	5122cfefa5fefbab9d27d5d8adacd8739eeeb5de
parent	3d7b92426081cd9f3197b13f7384a52dbac5379a (diff)
parent	6ef779cd5c8260b2e6979e676d28489fd53dd599 (diff)