aboutsummaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorgingerBill <gingerBill@users.noreply.github.com>2024-05-20 10:38:09 +0100
committerGitHub <noreply@github.com>2024-05-20 10:38:09 +0100
commit891fefe1178efffd11c31439cb1ed3d283b5a24f (patch)
tree6edc42cb9d2d7a29ba708878869db6a58e8b8381 /base
parent5473758467610e8d18581f0dab11dd5f5c416484 (diff)
parentf42b1c49731356699eb0c34736c7ef485783d9e2 (diff)
Merge pull request #3592 from laytan/wasm-gpa
add a default heap/general purpose allocator for wasm to `base:runtime`
Diffstat (limited to 'base')
-rw-r--r--base/runtime/default_allocators_general.odin3
-rw-r--r--base/runtime/internal.odin30
-rw-r--r--base/runtime/os_specific_wasi.odin4
-rw-r--r--base/runtime/wasm_allocator.odin870
4 files changed, 905 insertions, 2 deletions
diff --git a/base/runtime/default_allocators_general.odin b/base/runtime/default_allocators_general.odin
index cbaf4d22a..ab4dd1db8 100644
--- a/base/runtime/default_allocators_general.odin
+++ b/base/runtime/default_allocators_general.odin
@@ -6,6 +6,9 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
default_allocator_proc :: panic_allocator_proc
default_allocator :: panic_allocator
+} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 {
+ default_allocator :: default_wasm_allocator
+ default_allocator_proc :: wasm_allocator_proc
} else {
default_allocator :: heap_allocator
default_allocator_proc :: heap_allocator_proc
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin
index aaca1e703..8e1b3d633 100644
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -40,6 +40,24 @@ align_forward_int :: #force_inline proc(ptr, align: int) -> int {
return p
}
+is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool {
+ if x <= 0 {
+ return false
+ }
+ return (x & (x-1)) == 0
+}
+
+align_forward_uint :: #force_inline proc(ptr, align: uint) -> uint {
+ assert(is_power_of_two_uint(align))
+
+ p := ptr
+ modulo := p & (align-1)
+ if modulo != 0 {
+ p += align - modulo
+ }
+ return p
+}
+
is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
if x <= 0 {
return false
@@ -58,6 +76,18 @@ align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
return p
}
+is_power_of_two :: proc {
+ is_power_of_two_int,
+ is_power_of_two_uint,
+ is_power_of_two_uintptr,
+}
+
+align_forward :: proc {
+ align_forward_int,
+ align_forward_uint,
+ align_forward_uintptr,
+}
+
mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
if data == nil {
return nil
diff --git a/base/runtime/os_specific_wasi.odin b/base/runtime/os_specific_wasi.odin
index 94fa5fa89..0e229ac7e 100644
--- a/base/runtime/os_specific_wasi.odin
+++ b/base/runtime/os_specific_wasi.odin
@@ -5,7 +5,7 @@ package runtime
import "core:sys/wasm/wasi"
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
- data := (wasi.ciovec_t)(data)
- n, err := wasi.fd_write(1, {data})
+ data_iovec := (wasi.ciovec_t)(data)
+ n, err := wasi.fd_write(1, {data_iovec})
return int(n), _OS_Errno(err)
}
diff --git a/base/runtime/wasm_allocator.odin b/base/runtime/wasm_allocator.odin
new file mode 100644
index 000000000..acfc80b0a
--- /dev/null
+++ b/base/runtime/wasm_allocator.odin
@@ -0,0 +1,870 @@
+//+build wasm32, wasm64p32
+package runtime
+
+import "base:intrinsics"
+
+/*
+Port of emmalloc, modified for use in Odin.
+
+Invariants:
+ - Per-allocation header overhead is 8 bytes, smallest allocated payload
+ amount is 8 bytes, and a multiple of 4 bytes.
+ - Acquired memory blocks are subdivided into disjoint regions that lie
+ next to each other.
+ - A region is either in used or free.
+ Used regions may be adjacent, and a used and unused region
+ may be adjacent, but not two unused ones - they would be
+ merged.
+ - Memory allocation takes constant time, unless the alloc needs to wasm_memory_grow()
+ or memory is very close to being exhausted.
+ - Free and used regions are managed inside "root regions", which are slabs
+ of memory acquired via wasm_memory_grow().
+ - Memory retrieved using wasm_memory_grow() can not be given back to the OS.
+ Therefore, frees are internal to the allocator.
+
+Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+WASM_Allocator :: struct #no_copy {
+ // The minimum alignment of allocations.
+ alignment: uint,
+ // A region that contains as payload a single forward linked list of pointers to
+ // root regions of each disjoint region blocks.
+ list_of_all_regions: ^Root_Region,
+ // For each of the buckets, maintain a linked list head node. The head node for each
+ // free region is a sentinel node that does not actually represent any free space, but
+ // the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode)
+ // when adding and removing elements from the linked list, i.e. we are guaranteed that
+ // the sentinel node is always fixed and there, and the actual free region list elements
+ // start at free_region_buckets[i].next each.
+ free_region_buckets: [NUM_FREE_BUCKETS]Region,
+ // A bitmask that tracks the population status for each of the 64 distinct memory regions:
+ // a zero at bit position i means that the free list bucket i is empty. This bitmask is
+ // used to avoid redundant scanning of the 64 different free region buckets: instead by
+ // looking at the bitmask we can find in constant time an index to a free region bucket
+ // that contains free memory of desired size.
+ free_region_buckets_used: BUCKET_BITMASK_T,
+ // Because wasm memory can only be allocated in pages of 64k at a time, we keep any
+ // spilled/unused bytes that are left from the allocated pages here, first using this
+ // when bytes are needed.
+ spill: []byte,
+ // Mutex for thread safety, only used if the target feature "atomics" is enabled.
+ mu: Mutex_State,
+}
+
+// Not required to be called, called on first allocation otherwise.
+wasm_allocator_init :: proc(a: ^WASM_Allocator, alignment: uint = 8) {
+ assert(is_power_of_two(alignment), "alignment must be a power of two")
+ assert(alignment > 4, "alignment must be more than 4")
+
+ a.alignment = alignment
+
+ for i in 0..<NUM_FREE_BUCKETS {
+ a.free_region_buckets[i].next = &a.free_region_buckets[i]
+ a.free_region_buckets[i].prev = a.free_region_buckets[i].next
+ }
+
+ if !claim_more_memory(a, 3*size_of(Region)) {
+ panic("wasm_allocator: initial memory could not be allocated")
+ }
+}
+
+global_default_wasm_allocator_data: WASM_Allocator
+
+default_wasm_allocator :: proc() -> Allocator {
+ return wasm_allocator(&global_default_wasm_allocator_data)
+}
+
+wasm_allocator :: proc(a: ^WASM_Allocator) -> Allocator {
+ return {
+ data = a,
+ procedure = wasm_allocator_proc,
+ }
+}
+
+wasm_allocator_proc :: proc(a: rawptr, mode: Allocator_Mode, size, alignment: int, old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
+ a := (^WASM_Allocator)(a)
+ if a == nil {
+ a = &global_default_wasm_allocator_data
+ }
+
+ if a.alignment == 0 {
+ wasm_allocator_init(a)
+ }
+
+ switch mode {
+ case .Alloc:
+ ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
+ if ptr == nil {
+ return nil, .Out_Of_Memory
+ }
+ intrinsics.mem_zero(ptr, size)
+ return ([^]byte)(ptr)[:size], nil
+
+ case .Alloc_Non_Zeroed:
+ ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
+ if ptr == nil {
+ return nil, .Out_Of_Memory
+ }
+ return ([^]byte)(ptr)[:size], nil
+
+ case .Resize:
+ ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
+ if ptr == nil {
+ return nil, .Out_Of_Memory
+ }
+
+ bytes := ([^]byte)(ptr)[:size]
+
+ if size > old_size {
+ new_region := raw_data(bytes[old_size:])
+ intrinsics.mem_zero(new_region, size - old_size)
+ }
+
+ return bytes, nil
+
+ case .Resize_Non_Zeroed:
+ ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
+ if ptr == nil {
+ return nil, .Out_Of_Memory
+ }
+ return ([^]byte)(ptr)[:size], nil
+
+ case .Free:
+ free(a, old_memory, loc)
+ return nil, nil
+
+ case .Free_All, .Query_Info:
+ return nil, .Mode_Not_Implemented
+
+ case .Query_Features:
+ set := (^Allocator_Mode_Set)(old_memory)
+ if set != nil {
+ set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features }
+ }
+ return nil, nil
+ }
+
+ unreachable()
+}
+
+// Returns the allocated size of the allocator (both free and used).
+// If `nil` is given, the global allocator is used.
+wasm_allocator_size :: proc(a: ^WASM_Allocator = nil) -> (size: uint) {
+ a := a
+ if a == nil {
+ a = &global_default_wasm_allocator_data
+ }
+
+ lock(a)
+ defer unlock(a)
+
+ root := a.list_of_all_regions
+ for root != nil {
+ size += uint(uintptr(root.end_ptr) - uintptr(root))
+ root = root.next
+ }
+
+ size += len(a.spill)
+
+ return
+}
+
+// Returns the amount of free memory on the allocator.
+// If `nil` is given, the global allocator is used.
+wasm_allocator_free_space :: proc(a: ^WASM_Allocator = nil) -> (free: uint) {
+ a := a
+ if a == nil {
+ a = &global_default_wasm_allocator_data
+ }
+
+ lock(a)
+ defer unlock(a)
+
+ bucket_index: u64 = 0
+ bucket_mask := a.free_region_buckets_used
+
+ for bucket_mask != 0 {
+ index_add := intrinsics.count_trailing_zeros(bucket_mask)
+ bucket_index += index_add
+ bucket_mask >>= index_add
+ for free_region := a.free_region_buckets[bucket_index].next; free_region != &a.free_region_buckets[bucket_index]; free_region = free_region.next {
+ free += free_region.size - REGION_HEADER_SIZE
+ }
+ bucket_index += 1
+ bucket_mask >>= 1
+ }
+
+ free += len(a.spill)
+
+ return
+}
+
+@(private="file")
+NUM_FREE_BUCKETS :: 64
+@(private="file")
+BUCKET_BITMASK_T :: u64
+
+// Dynamic memory is subdivided into regions, in the format
+
+// <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | .....
+
+// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the
+// memory regions backwards and forwards. Because each allocation must be at least a multiple of 4 bytes, the lowest two bits of
+// each size field is unused. Free regions are distinguished by used regions by having the FREE_REGION_FLAG bit present
+// in the size field. I.e. for free regions, the size field is odd, and for used regions, the size field reads even.
+@(private="file")
+FREE_REGION_FLAG :: 0x1
+
+// Attempts to alloc more than this many bytes would cause an overflow when calculating the size of a region,
+// therefore allocations larger than this are short-circuited immediately on entry.
+@(private="file")
+MAX_ALLOC_SIZE :: 0xFFFFFFC7
+
+// A free region has the following structure:
+// <size:uint> <prevptr> <nextptr> ... <size:uint>
+
+@(private="file")
+Region :: struct {
+ size: uint,
+ prev, next: ^Region,
+ _at_the_end_of_this_struct_size: uint,
+}
+
+// Each memory block starts with a Root_Region at the beginning.
+// The Root_Region specifies the size of the region block, and forms a linked
+// list of all Root_Regions in the program, starting with `list_of_all_regions`
+// below.
+@(private="file")
+Root_Region :: struct {
+ size: u32,
+ next: ^Root_Region,
+ end_ptr: ^byte,
+}
+
+@(private="file")
+Mutex_State :: enum u32 {
+ Unlocked = 0,
+ Locked = 1,
+ Waiting = 2,
+}
+
+@(private="file")
+lock :: proc(a: ^WASM_Allocator) {
+ when intrinsics.has_target_feature("atomics") {
+ @(cold)
+ lock_slow :: proc(a: ^WASM_Allocator, curr_state: Mutex_State) {
+ new_state := curr_state // Make a copy of it
+
+ spin_lock: for spin in 0..<i32(100) {
+ state, ok := intrinsics.atomic_compare_exchange_weak_explicit(&a.mu, .Unlocked, new_state, .Acquire, .Consume)
+ if ok {
+ return
+ }
+
+ if state == .Waiting {
+ break spin_lock
+ }
+
+ for i := min(spin+1, 32); i > 0; i -= 1 {
+ intrinsics.cpu_relax()
+ }
+ }
+
+ // Set just in case 100 iterations did not do it
+ new_state = .Waiting
+
+ for {
+ if intrinsics.atomic_exchange_explicit(&a.mu, .Waiting, .Acquire) == .Unlocked {
+ return
+ }
+
+ assert(intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1) != 0)
+ intrinsics.cpu_relax()
+ }
+ }
+
+
+ if v := intrinsics.atomic_exchange_explicit(&a.mu, .Locked, .Acquire); v != .Unlocked {
+ lock_slow(a, v)
+ }
+ }
+}
+
+@(private="file")
+unlock :: proc(a: ^WASM_Allocator) {
+ when intrinsics.has_target_feature("atomics") {
+ @(cold)
+ unlock_slow :: proc(a: ^WASM_Allocator) {
+ for {
+ s := intrinsics.wasm_memory_atomic_notify32((^u32)(&a.mu), 1)
+ if s >= 1 {
+ return
+ }
+ }
+ }
+
+ switch intrinsics.atomic_exchange_explicit(&a.mu, .Unlocked, .Release) {
+ case .Unlocked:
+ unreachable()
+ case .Locked:
+ // Okay
+ case .Waiting:
+ unlock_slow(a)
+ }
+ }
+}
+
+@(private="file")
+assert_locked :: proc(a: ^WASM_Allocator) {
+ when intrinsics.has_target_feature("atomics") {
+ assert(intrinsics.atomic_load(&a.mu) != .Unlocked)
+ }
+}
+
+@(private="file")
+has_alignment_uintptr :: proc(ptr: uintptr, #any_int alignment: uintptr) -> bool {
+ return ptr & (alignment-1) == 0
+}
+
+@(private="file")
+has_alignment_uint :: proc(ptr: uint, alignment: uint) -> bool {
+ return ptr & (alignment-1) == 0
+}
+
+@(private="file")
+has_alignment :: proc {
+ has_alignment_uintptr,
+ has_alignment_uint,
+}
+
+@(private="file")
+REGION_HEADER_SIZE :: 2*size_of(uint)
+
+@(private="file")
+SMALLEST_ALLOCATION_SIZE :: 2*size_of(rawptr)
+
+// Subdivide regions of free space into distinct circular doubly linked lists, where each linked list
+// represents a range of free space blocks. The following function compute_free_list_bucket() converts
+// an allocation size to the bucket index that should be looked at.
+#assert(NUM_FREE_BUCKETS == 64, "Following function is tailored specifically for the NUM_FREE_BUCKETS == 64 case")
+@(private="file")
+compute_free_list_bucket :: proc(size: uint) -> uint {
+ if size < 128 { return (size >> 3) - 1 }
+
+ clz := intrinsics.count_leading_zeros(i32(size))
+ bucket_index: i32 = ((clz > 19) \
+ ? 110 - (clz<<2) + ((i32)(size >> (u32)(29-clz)) ~ 4) \
+ : min( 71 - (clz<<1) + ((i32)(size >> (u32)(30-clz)) ~ 2), NUM_FREE_BUCKETS-1))
+
+ assert(bucket_index >= 0)
+ assert(bucket_index < NUM_FREE_BUCKETS)
+ return uint(bucket_index)
+}
+
+@(private="file")
+prev_region :: proc(region: ^Region) -> ^Region {
+ prev_region_size := ([^]uint)(region)[-1]
+ prev_region_size = prev_region_size & ~uint(FREE_REGION_FLAG)
+ return (^Region)(uintptr(region)-uintptr(prev_region_size))
+}
+
+@(private="file")
+next_region :: proc(region: ^Region) -> ^Region {
+ return (^Region)(uintptr(region)+uintptr(region.size))
+}
+
+@(private="file")
+region_ceiling_size :: proc(region: ^Region) -> uint {
+ return ([^]uint)(uintptr(region)+uintptr(region.size))[-1]
+}
+
+@(private="file")
+region_is_free :: proc(r: ^Region) -> bool {
+ return region_ceiling_size(r) & FREE_REGION_FLAG >= 1
+}
+
+@(private="file")
+region_is_in_use :: proc(r: ^Region) -> bool {
+ return r.size == region_ceiling_size(r)
+}
+
+@(private="file")
+region_payload_start_ptr :: proc(r: ^Region) -> [^]byte {
+ return ([^]byte)(r)[size_of(uint):]
+}
+
+@(private="file")
+region_payload_end_ptr :: proc(r: ^Region) -> [^]byte {
+ return ([^]byte)(r)[r.size-size_of(uint):]
+}
+
+@(private="file")
+create_used_region :: proc(ptr: rawptr, size: uint) {
+ assert(has_alignment(uintptr(ptr), size_of(uint)))
+ assert(has_alignment(size, size_of(uint)))
+ assert(size >= size_of(Region))
+
+ uptr := ([^]uint)(ptr)
+ uptr[0] = size
+ uptr[size/size_of(uint)-1] = size
+}
+
+@(private="file")
+create_free_region :: proc(ptr: rawptr, size: uint) {
+ assert(has_alignment(uintptr(ptr), size_of(uint)))
+ assert(has_alignment(size, size_of(uint)))
+ assert(size >= size_of(Region))
+
+ free_region := (^Region)(ptr)
+ free_region.size = size
+ ([^]uint)(ptr)[size/size_of(uint)-1] = size | FREE_REGION_FLAG
+}
+
+@(private="file")
+prepend_to_free_list :: proc(region: ^Region, prepend_to: ^Region) {
+ assert(region_is_free(region))
+ region.next = prepend_to
+ region.prev = prepend_to.prev
+ prepend_to.prev = region
+ region.prev.next = region
+}
+
+@(private="file")
+unlink_from_free_list :: proc(region: ^Region) {
+ assert(region_is_free(region))
+ region.prev.next = region.next
+ region.next.prev = region.prev
+}
+
+@(private="file")
+link_to_free_list :: proc(a: ^WASM_Allocator, free_region: ^Region) {
+ assert(free_region.size >= size_of(Region))
+ bucket_index := compute_free_list_bucket(free_region.size-REGION_HEADER_SIZE)
+ free_list_head := &a.free_region_buckets[bucket_index]
+ free_region.prev = free_list_head
+ free_region.next = free_list_head.next
+ free_list_head.next = free_region
+ free_region.next.prev = free_region
+ a.free_region_buckets_used |= BUCKET_BITMASK_T(1) << bucket_index
+}
+
+@(private="file")
+claim_more_memory :: proc(a: ^WASM_Allocator, num_bytes: uint) -> bool {
+
+ PAGE_SIZE :: 64 * 1024
+
+ page_alloc :: proc(page_count: int) -> []byte {
+ prev_page_count := intrinsics.wasm_memory_grow(0, uintptr(page_count))
+ if prev_page_count < 0 { return nil }
+
+ ptr := ([^]byte)(uintptr(prev_page_count) * PAGE_SIZE)
+ return ptr[:page_count * PAGE_SIZE]
+ }
+
+ alloc :: proc(a: ^WASM_Allocator, num_bytes: uint) -> (bytes: [^]byte) #no_bounds_check {
+ if uint(len(a.spill)) >= num_bytes {
+ bytes = raw_data(a.spill[:num_bytes])
+ a.spill = a.spill[num_bytes:]
+ return
+ }
+
+ pages := int((num_bytes / PAGE_SIZE) + 1)
+ allocated := page_alloc(pages)
+ if allocated == nil { return nil }
+
+ // If the allocated memory is a direct continuation of the spill from before,
+ // we can just extend the spill.
+ spill_end := uintptr(raw_data(a.spill)) + uintptr(len(a.spill))
+ if spill_end == uintptr(raw_data(allocated)) {
+ raw_spill := transmute(^Raw_Slice)(&a.spill)
+ raw_spill.len += len(allocated)
+ } else {
+ // Otherwise, we have to "waste" the previous spill.
+ // Now this is probably uncommon, and will only happen if another code path
+ // is also requesting pages.
+ a.spill = allocated
+ }
+
+ bytes = raw_data(a.spill)
+ a.spill = a.spill[num_bytes:]
+ return
+ }
+
+ num_bytes := num_bytes
+ num_bytes = align_forward(num_bytes, a.alignment)
+
+ start_ptr := alloc(a, uint(num_bytes))
+ if start_ptr == nil { return false }
+
+ assert(has_alignment(uintptr(start_ptr), align_of(uint)))
+ end_ptr := start_ptr[num_bytes:]
+
+ end_sentinel_region := (^Region)(end_ptr[-size_of(Region):])
+ create_used_region(end_sentinel_region, size_of(Region))
+
+ // If we are the sole user of wasm_memory_grow(), it will feed us continuous/consecutive memory addresses - take advantage
+ // of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size.
+ prev_alloc_end_address := a.list_of_all_regions != nil ? a.list_of_all_regions.end_ptr : nil
+ if start_ptr == prev_alloc_end_address {
+ prev_end_sentinel := prev_region((^Region)(start_ptr))
+ assert(region_is_in_use(prev_end_sentinel))
+ prev_region := prev_region(prev_end_sentinel)
+
+ a.list_of_all_regions.end_ptr = end_ptr
+
+ // Two scenarios, either the last region of the previous block was in use, in which case we need to create
+ // a new free region in the newly allocated space; or it was free, in which case we can extend that region
+ // to cover a larger size.
+ if region_is_free(prev_region) {
+ new_free_region_size := uint(uintptr(end_sentinel_region) - uintptr(prev_region))
+ unlink_from_free_list(prev_region)
+ create_free_region(prev_region, new_free_region_size)
+ link_to_free_list(a, prev_region)
+ return true
+ }
+
+ start_ptr = start_ptr[-size_of(Region):]
+ } else {
+ create_used_region(start_ptr, size_of(Region))
+
+ new_region_block := (^Root_Region)(start_ptr)
+ new_region_block.next = a.list_of_all_regions
+ new_region_block.end_ptr = end_ptr
+ a.list_of_all_regions = new_region_block
+ start_ptr = start_ptr[size_of(Region):]
+ }
+
+ create_free_region(start_ptr, uint(uintptr(end_sentinel_region)-uintptr(start_ptr)))
+ link_to_free_list(a, (^Region)(start_ptr))
+ return true
+}
+
+@(private="file")
+validate_alloc_size :: proc(size: uint) -> uint {
+ #assert(size_of(uint) >= size_of(uintptr))
+ #assert(size_of(uint) % size_of(uintptr) == 0)
+
+ // NOTE: emmalloc aligns this forward on pointer size, but I think that is a mistake and will
+ // do bad on wasm64p32.
+
+ validated_size := size > SMALLEST_ALLOCATION_SIZE ? align_forward(size, size_of(uint)) : SMALLEST_ALLOCATION_SIZE
+ assert(validated_size >= size) // Assert we haven't wrapped.
+
+ return validated_size
+}
+
+@(private="file")
+allocate_memory :: proc(a: ^WASM_Allocator, alignment: uint, size: uint, loc := #caller_location) -> rawptr {
+
+ attempt_allocate :: proc(a: ^WASM_Allocator, free_region: ^Region, alignment, size: uint) -> rawptr {
+ assert_locked(a)
+ free_region := free_region
+
+ payload_start_ptr := uintptr(region_payload_start_ptr(free_region))
+ payload_start_ptr_aligned := align_forward(payload_start_ptr, uintptr(alignment))
+ payload_end_ptr := uintptr(region_payload_end_ptr(free_region))
+
+ if payload_start_ptr_aligned + uintptr(size) > payload_end_ptr {
+ return nil
+ }
+
+ // We have enough free space, so the memory allocation will be made into this region. Remove this free region
+ // from the list of free regions: whatever slop remains will be later added back to the free region pool.
+ unlink_from_free_list(free_region)
+
+ // Before we proceed further, fix up the boundary between this and the preceding region,
+ // so that the boundary between the two regions happens at a right spot for the payload to be aligned.
+ if payload_start_ptr != payload_start_ptr_aligned {
+ prev := prev_region(free_region)
+ assert(region_is_in_use(prev))
+ region_boundary_bump_amount := payload_start_ptr_aligned - payload_start_ptr
+ new_this_region_size := free_region.size - uint(region_boundary_bump_amount)
+ create_used_region(prev, prev.size + uint(region_boundary_bump_amount))
+ free_region = (^Region)(uintptr(free_region) + region_boundary_bump_amount)
+ free_region.size = new_this_region_size
+ }
+
+ // Next, we need to decide whether this region is so large that it should be split into two regions,
+ // one representing the newly used memory area, and at the high end a remaining leftover free area.
+ // This splitting to two is done always if there is enough space for the high end to fit a region.
+ // Carve 'size' bytes of payload off this region. So,
+ // [sz prev next sz]
+ // becomes
+ // [sz payload sz] [sz prev next sz]
+ if size_of(Region) + REGION_HEADER_SIZE + size <= free_region.size {
+ new_free_region := (^Region)(uintptr(free_region) + REGION_HEADER_SIZE + uintptr(size))
+ create_free_region(new_free_region, free_region.size - size - REGION_HEADER_SIZE)
+ link_to_free_list(a, new_free_region)
+ create_used_region(free_region, size + REGION_HEADER_SIZE)
+ } else {
+ // There is not enough space to split the free memory region into used+free parts, so consume the whole
+ // region as used memory, not leaving a free memory region behind.
+ // Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom.
+ ([^]uint)(uintptr(free_region) + uintptr(free_region.size))[-1] = free_region.size
+ }
+
+ return rawptr(uintptr(free_region) + size_of(uint))
+ }
+
+ assert_locked(a)
+ assert(is_power_of_two(alignment))
+ assert(size <= MAX_ALLOC_SIZE, "allocation too big", loc=loc)
+
+ alignment := alignment
+ alignment = max(alignment, a.alignment)
+
+ size := size
+ size = validate_alloc_size(size)
+
+ // Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory.
+ // Under normal alignment conditions this should always be the first or second bucket we look at, but if
+ // performing an allocation with complex alignment, we may need to look at multiple buckets.
+ bucket_index := compute_free_list_bucket(size)
+ bucket_mask := a.free_region_buckets_used >> bucket_index
+
+ // Loop through each bucket that has free regions in it, based on bits set in free_region_buckets_used bitmap.
+ for bucket_mask != 0 {
+ index_add := intrinsics.count_trailing_zeros(bucket_mask)
+ bucket_index += uint(index_add)
+ bucket_mask >>= index_add
+ assert(bucket_index <= NUM_FREE_BUCKETS-1)
+ assert(a.free_region_buckets_used & (BUCKET_BITMASK_T(1) << bucket_index) > 0)
+
+ free_region := a.free_region_buckets[bucket_index].next
+ assert(free_region != nil)
+ if free_region != &a.free_region_buckets[bucket_index] {
+ ptr := attempt_allocate(a, free_region, alignment, size)
+ if ptr != nil {
+ return ptr
+ }
+
+ // We were not able to allocate from the first region found in this bucket, so penalize
+ // the region by cycling it to the end of the doubly circular linked list. (constant time)
+ // This provides a randomized guarantee that when performing allocations of size k to a
+ // bucket of [k-something, k+something] range, we will not always attempt to satisfy the
+ // allocation from the same available region at the front of the list, but we try each
+ // region in turn.
+ unlink_from_free_list(free_region)
+ prepend_to_free_list(free_region, &a.free_region_buckets[bucket_index])
+ // But do not stick around to attempt to look at other regions in this bucket - move
+ // to search the next populated bucket index if this did not fit. This gives a practical
+ // "allocation in constant time" guarantee, since the next higher bucket will only have
+ // regions that are all of strictly larger size than the requested allocation. Only if
+ // there is a difficult alignment requirement we may fail to perform the allocation from
+ // a region in the next bucket, and if so, we keep trying higher buckets until one of them
+ // works.
+ bucket_index += 1
+ bucket_mask >>= 1
+ } else {
+ // This bucket was not populated after all with any regions,
+ // but we just had a stale bit set to mark a populated bucket.
+ // Reset the bit to update latest status so that we do not
+ // redundantly look at this bucket again.
+ a.free_region_buckets_used &= ~(BUCKET_BITMASK_T(1) << bucket_index)
+ bucket_mask ~= 1
+ }
+
+ assert((bucket_index == NUM_FREE_BUCKETS && bucket_mask == 0) || (bucket_mask == a.free_region_buckets_used >> bucket_index))
+ }
+
+ // None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory.
+ // The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so
+ // as a last resort, loop through more free regions in the bucket that represents the largest allocations available.
+ // But only if the bucket representing largest allocations available is not any of the first thirty buckets,
+ // these represent allocatable areas less than <1024 bytes - which could be a lot of scrap.
+ // In such case, prefer to claim more memory right away.
+ largest_bucket_index := NUM_FREE_BUCKETS - 1 - intrinsics.count_leading_zeros(a.free_region_buckets_used)
+ // free_region will be null if there is absolutely no memory left. (all buckets are 100% used)
+ free_region := a.free_region_buckets_used > 0 ? a.free_region_buckets[largest_bucket_index].next : nil
+ // The 30 first free region buckets cover memory blocks < 2048 bytes, so skip looking at those here (too small)
+ if a.free_region_buckets_used >> 30 > 0 {
+ // Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior.
+ // If this many regions cannot find free space, we give up and prefer to claim more memory instead.
+ max_regions_to_try_before_giving_up :: 99
+ num_tries_left := max_regions_to_try_before_giving_up
+ for ; free_region != &a.free_region_buckets[largest_bucket_index] && num_tries_left > 0; num_tries_left -= 1 {
+ ptr := attempt_allocate(a, free_region, alignment, size)
+ if ptr != nil {
+ return ptr
+ }
+ free_region = free_region.next
+ }
+ }
+
+ // We were unable to find a free memory region. Must claim more memory!
+ num_bytes_to_claim := size+size_of(Region)*3
+ if alignment > a.alignment {
+ num_bytes_to_claim += alignment
+ }
+ success := claim_more_memory(a, num_bytes_to_claim)
+ if (success) {
+ // Try allocate again with the newly available memory.
+ return allocate_memory(a, alignment, size)
+ }
+
+ // also claim_more_memory failed, we are really really constrained :( As a last resort, go back to looking at the
+ // bucket we already looked at above, continuing where the above search left off - perhaps there are
+ // regions we overlooked the first time that might be able to satisfy the allocation.
+ if free_region != nil {
+ for free_region != &a.free_region_buckets[largest_bucket_index] {
+ ptr := attempt_allocate(a, free_region, alignment, size)
+ if ptr != nil {
+ return ptr
+ }
+ free_region = free_region.next
+ }
+ }
+
+ // Fully out of memory.
+ return nil
+}
+
+@(private="file")
+aligned_alloc :: proc(a: ^WASM_Allocator, alignment, size: uint, loc := #caller_location) -> rawptr {
+ lock(a)
+ defer unlock(a)
+
+ return allocate_memory(a, alignment, size, loc)
+}
+
+@(private="file")
+free :: proc(a: ^WASM_Allocator, ptr: rawptr, loc := #caller_location) {
+ if ptr == nil {
+ return
+ }
+
+ region_start_ptr := uintptr(ptr) - size_of(uint)
+ region := (^Region)(region_start_ptr)
+ assert(has_alignment(region_start_ptr, size_of(uint)))
+
+ lock(a)
+ defer unlock(a)
+
+ size := region.size
+ assert(region_is_in_use(region), "double free", loc=loc)
+
+ prev_region_size_field := ([^]uint)(region)[-1]
+ prev_region_size := prev_region_size_field & ~uint(FREE_REGION_FLAG)
+ if prev_region_size_field != prev_region_size {
+ prev_region := (^Region)(uintptr(region) - uintptr(prev_region_size))
+ unlink_from_free_list(prev_region)
+ region_start_ptr = uintptr(prev_region)
+ size += prev_region_size
+ }
+
+ next_reg := next_region(region)
+ size_at_end := (^uint)(region_payload_end_ptr(next_reg))^
+ if next_reg.size != size_at_end {
+ unlink_from_free_list(next_reg)
+ size += next_reg.size
+ }
+
+ create_free_region(rawptr(region_start_ptr), size)
+ link_to_free_list(a, (^Region)(region_start_ptr))
+}
+
+@(private="file")
+aligned_realloc :: proc(a: ^WASM_Allocator, ptr: rawptr, alignment, size: uint, loc := #caller_location) -> rawptr {
+
+ attempt_region_resize :: proc(a: ^WASM_Allocator, region: ^Region, size: uint) -> bool {
+ lock(a)
+ defer unlock(a)
+
+ // First attempt to resize this region, if the next region that follows this one
+ // is a free region.
+ next_reg := next_region(region)
+ next_region_end_ptr := uintptr(next_reg) + uintptr(next_reg.size)
+ size_at_ceiling := ([^]uint)(next_region_end_ptr)[-1]
+ if next_reg.size != size_at_ceiling { // Next region is free?
+ assert(region_is_free(next_reg))
+ new_next_region_start_ptr := uintptr(region) + uintptr(size)
+ assert(has_alignment(new_next_region_start_ptr, size_of(uint)))
+ // Next region does not shrink to too small size?
+ if new_next_region_start_ptr + size_of(Region) <= next_region_end_ptr {
+ unlink_from_free_list(next_reg)
+ create_free_region(rawptr(new_next_region_start_ptr), uint(next_region_end_ptr - new_next_region_start_ptr))
+ link_to_free_list(a, (^Region)(new_next_region_start_ptr))
+ create_used_region(region, uint(new_next_region_start_ptr - uintptr(region)))
+ return true
+ }
+ // If we remove the next region altogether, allocation is satisfied?
+ if new_next_region_start_ptr <= next_region_end_ptr {
+ unlink_from_free_list(next_reg)
+ create_used_region(region, region.size + next_reg.size)
+ return true
+ }
+ } else {
+ // Next region is an used region - we cannot change its starting address. However if we are shrinking the
+ // size of this region, we can create a new free region between this and the next used region.
+ if size + size_of(Region) <= region.size {
+ free_region_size := region.size - size
+ create_used_region(region, size)
+ free_region := (^Region)(uintptr(region) + uintptr(size))
+ create_free_region(free_region, free_region_size)
+ link_to_free_list(a, free_region)
+ return true
+ } else if size <= region.size {
+ // Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk
+ // area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case,
+ // just call it success without doing any work.
+ return true
+ }
+ }
+
+ return false
+ }
+
+ if ptr == nil {
+ return aligned_alloc(a, alignment, size, loc)
+ }
+
+ if size == 0 {
+ free(a, ptr, loc)
+ return nil
+ }
+
+ if size > MAX_ALLOC_SIZE {
+ return nil
+ }
+
+ assert(is_power_of_two(alignment))
+ assert(has_alignment(uintptr(ptr), alignment), "realloc on different alignment than original allocation", loc=loc)
+
+ size := size
+ size = validate_alloc_size(size)
+
+ region := (^Region)(uintptr(ptr) - size_of(uint))
+
+ // Attempt an in-place resize.
+ if attempt_region_resize(a, region, size + REGION_HEADER_SIZE) {
+ return ptr
+ }
+
+ // Can't do it in-place, allocate new region and copy over.
+ newptr := aligned_alloc(a, alignment, size, loc)
+ if newptr != nil {
+ intrinsics.mem_copy(newptr, ptr, min(size, region.size - REGION_HEADER_SIZE))
+ free(a, ptr, loc=loc)
+ }
+
+ return newptr
+}