diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2024-05-20 10:38:09 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-20 10:38:09 +0100 |
| commit | 891fefe1178efffd11c31439cb1ed3d283b5a24f (patch) | |
| tree | 6edc42cb9d2d7a29ba708878869db6a58e8b8381 /base | |
| parent | 5473758467610e8d18581f0dab11dd5f5c416484 (diff) | |
| parent | f42b1c49731356699eb0c34736c7ef485783d9e2 (diff) | |
Merge pull request #3592 from laytan/wasm-gpa
add a default heap/general purpose allocator for wasm to `base:runtime`
Diffstat (limited to 'base')
| -rw-r--r-- | base/runtime/default_allocators_general.odin | 3 | ||||
| -rw-r--r-- | base/runtime/internal.odin | 30 | ||||
| -rw-r--r-- | base/runtime/os_specific_wasi.odin | 4 | ||||
| -rw-r--r-- | base/runtime/wasm_allocator.odin | 870 |
4 files changed, 905 insertions, 2 deletions
diff --git a/base/runtime/default_allocators_general.odin b/base/runtime/default_allocators_general.odin index cbaf4d22a..ab4dd1db8 100644 --- a/base/runtime/default_allocators_general.odin +++ b/base/runtime/default_allocators_general.odin @@ -6,6 +6,9 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR { } else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR { default_allocator_proc :: panic_allocator_proc default_allocator :: panic_allocator +} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 { + default_allocator :: default_wasm_allocator + default_allocator_proc :: wasm_allocator_proc } else { default_allocator :: heap_allocator default_allocator_proc :: heap_allocator_proc diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index aaca1e703..8e1b3d633 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -40,6 +40,24 @@ align_forward_int :: #force_inline proc(ptr, align: int) -> int { return p } +is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool { + if x <= 0 { + return false + } + return (x & (x-1)) == 0 +} + +align_forward_uint :: #force_inline proc(ptr, align: uint) -> uint { + assert(is_power_of_two_uint(align)) + + p := ptr + modulo := p & (align-1) + if modulo != 0 { + p += align - modulo + } + return p +} + is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool { if x <= 0 { return false @@ -58,6 +76,18 @@ align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr { return p } +is_power_of_two :: proc { + is_power_of_two_int, + is_power_of_two_uint, + is_power_of_two_uintptr, +} + +align_forward :: proc { + align_forward_int, + align_forward_uint, + align_forward_uintptr, +} + mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr { if data == nil { return nil diff --git a/base/runtime/os_specific_wasi.odin b/base/runtime/os_specific_wasi.odin index 94fa5fa89..0e229ac7e 100644 --- a/base/runtime/os_specific_wasi.odin +++ b/base/runtime/os_specific_wasi.odin @@ -5,7 +5,7 @@ package runtime import "core:sys/wasm/wasi" _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) { - data := (wasi.ciovec_t)(data) - n, err := wasi.fd_write(1, {data}) + data_iovec := (wasi.ciovec_t)(data) + n, err := wasi.fd_write(1, {data_iovec}) return int(n), _OS_Errno(err) } diff --git a/base/runtime/wasm_allocator.odin b/base/runtime/wasm_allocator.odin new file mode 100644 index 000000000..acfc80b0a --- /dev/null +++ b/base/runtime/wasm_allocator.odin @@ -0,0 +1,870 @@ +//+build wasm32, wasm64p32 +package runtime + +import "base:intrinsics" + +/* +Port of emmalloc, modified for use in Odin. + +Invariants: + - Per-allocation header overhead is 8 bytes, smallest allocated payload + amount is 8 bytes, and a multiple of 4 bytes. + - Acquired memory blocks are subdivided into disjoint regions that lie + next to each other. + - A region is either in used or free. + Used regions may be adjacent, and a used and unused region + may be adjacent, but not two unused ones - they would be + merged. + - Memory allocation takes constant time, unless the alloc needs to wasm_memory_grow() + or memory is very close to being exhausted. + - Free and used regions are managed inside "root regions", which are slabs + of memory acquired via wasm_memory_grow(). + - Memory retrieved using wasm_memory_grow() can not be given back to the OS. + Therefore, frees are internal to the allocator. + +Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +WASM_Allocator :: struct #no_copy { + // The minimum alignment of allocations. + alignment: uint, + // A region that contains as payload a single forward linked list of pointers to + // root regions of each disjoint region blocks. + list_of_all_regions: ^Root_Region, + // For each of the buckets, maintain a linked list head node. The head node for each + // free region is a sentinel node that does not actually represent any free space, but + // the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode) + // when adding and removing elements from the linked list, i.e. we are guaranteed that + // the sentinel node is always fixed and there, and the actual free region list elements + // start at free_region_buckets[i].next each. + free_region_buckets: [NUM_FREE_BUCKETS]Region, + // A bitmask that tracks the population status for each of the 64 distinct memory regions: + // a zero at bit position i means that the free list bucket i is empty. This bitmask is + // used to avoid redundant scanning of the 64 different free region buckets: instead by + // looking at the bitmask we can find in constant time an index to a free region bucket + // that contains free memory of desired size. + free_region_buckets_used: BUCKET_BITMASK_T, + // Because wasm memory can only be allocated in pages of 64k at a time, we keep any + // spilled/unused bytes that are left from the allocated pages here, first using this + // when bytes are needed. + spill: []byte, + // Mutex for thread safety, only used if the target feature "atomics" is enabled. + mu: Mutex_State, +} + +// Not required to be called, called on first allocation otherwise. +wasm_allocator_init :: proc(a: ^WASM_Allocator, alignment: uint = 8) { + assert(is_power_of_two(alignment), "alignment must be a power of two") + assert(alignment > 4, "alignment must be more than 4") + + a.alignment = alignment + + for i in 0..<NUM_FREE_BUCKETS { + a.free_region_buckets[i].next = &a.free_region_buckets[i] + a.free_region_buckets[i].prev = a.free_region_buckets[i].next + } + + if !claim_more_memory(a, 3*size_of(Region)) { + panic("wasm_allocator: initial memory could not be allocated") + } +} + +global_default_wasm_allocator_data: WASM_Allocator + +default_wasm_allocator :: proc() -> Allocator { + return wasm_allocator(&global_default_wasm_allocator_data) +} + +wasm_allocator :: proc(a: ^WASM_Allocator) -> Allocator { + return { + data = a, + procedure = wasm_allocator_proc, + } +} + +wasm_allocator_proc :: proc(a: rawptr, mode: Allocator_Mode, size, alignment: int, old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) { + a := (^WASM_Allocator)(a) + if a == nil { + a = &global_default_wasm_allocator_data + } + + if a.alignment == 0 { + wasm_allocator_init(a) + } + + switch mode { + case .Alloc: + ptr := aligned_alloc(a, uint(alignment), uint(size), loc) + if ptr == nil { + return nil, .Out_Of_Memory + } + intrinsics.mem_zero(ptr, size) + return ([^]byte)(ptr)[:size], nil + + case .Alloc_Non_Zeroed: + ptr := aligned_alloc(a, uint(alignment), uint(size), loc) + if ptr == nil { + return nil, .Out_Of_Memory + } + return ([^]byte)(ptr)[:size], nil + + case .Resize: + ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc) + if ptr == nil { + return nil, .Out_Of_Memory + } + + bytes := ([^]byte)(ptr)[:size] + + if size > old_size { + new_region := raw_data(bytes[old_size:]) + intrinsics.mem_zero(new_region, size - old_size) + } + + return bytes, nil + + case .Resize_Non_Zeroed: + ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc) + if ptr == nil { + return nil, .Out_Of_Memory + } + return ([^]byte)(ptr)[:size], nil + + case .Free: + free(a, old_memory, loc) + return nil, nil + + case .Free_All, .Query_Info: + return nil, .Mode_Not_Implemented + + case .Query_Features: + set := (^Allocator_Mode_Set)(old_memory) + if set != nil { + set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features } + } + return nil, nil + } + + unreachable() +} + +// Returns the allocated size of the allocator (both free and used). +// If `nil` is given, the global allocator is used. +wasm_allocator_size :: proc(a: ^WASM_Allocator = nil) -> (size: uint) { + a := a + if a == nil { + a = &global_default_wasm_allocator_data + } + + lock(a) + defer unlock(a) + + root := a.list_of_all_regions + for root != nil { + size += uint(uintptr(root.end_ptr) - uintptr(root)) + root = root.next + } + + size += len(a.spill) + + return +} + +// Returns the amount of free memory on the allocator. +// If `nil` is given, the global allocator is used. +wasm_allocator_free_space :: proc(a: ^WASM_Allocator = nil) -> (free: uint) { + a := a + if a == nil { + a = &global_default_wasm_allocator_data + } + + lock(a) + defer unlock(a) + + bucket_index: u64 = 0 + bucket_mask := a.free_region_buckets_used + + for bucket_mask != 0 { + index_add := intrinsics.count_trailing_zeros(bucket_mask) + bucket_index += index_add + bucket_mask >>= index_add + for free_region := a.free_region_buckets[bucket_index].next; free_region != &a.free_region_buckets[bucket_index]; free_region = free_region.next { + free += free_region.size - REGION_HEADER_SIZE + } + bucket_index += 1 + bucket_mask >>= 1 + } + + free += len(a.spill) + + return +} + +@(private="file") +NUM_FREE_BUCKETS :: 64 +@(private="file") +BUCKET_BITMASK_T :: u64 + +// Dynamic memory is subdivided into regions, in the format + +// <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | ..... + +// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the +// memory regions backwards and forwards. Because each allocation must be at least a multiple of 4 bytes, the lowest two bits of +// each size field is unused. Free regions are distinguished by used regions by having the FREE_REGION_FLAG bit present +// in the size field. I.e. for free regions, the size field is odd, and for used regions, the size field reads even. +@(private="file") +FREE_REGION_FLAG :: 0x1 + +// Attempts to alloc more than this many bytes would cause an overflow when calculating the size of a region, +// therefore allocations larger than this are short-circuited immediately on entry. +@(private="file") +MAX_ALLOC_SIZE :: 0xFFFFFFC7 + +// A free region has the following structure: +// <size:uint> <prevptr> <nextptr> ... <size:uint> + +@(private="file") +Region :: struct { + size: uint, + prev, next: ^Region, + _at_the_end_of_this_struct_size: uint, +} + +// Each memory block starts with a Root_Region at the beginning. +// The Root_Region specifies the size of the region block, and forms a linked +// list of all Root_Regions in the program, starting with `list_of_all_regions` +// below. +@(private="file") +Root_Region :: struct { + size: u32, + next: ^Root_Region, + end_ptr: ^byte, +} + +@(private="file") +Mutex_State :: enum u32 { + Unlocked = 0, + Locked = 1, + Waiting = 2, +} + +@(private="file") +lock :: proc(a: ^WASM_Allocator) { + when intrinsics.has_target_feature("atomics") { + @(cold) + lock_slow :: proc(a: ^WASM_Allocator, curr_state: Mutex_State) { + new_state := curr_state // Make a copy of it + + spin_lock: for spin in 0..<i32(100) { + state, ok := intrinsics.atomic_compare_exchange_weak_explicit(&a.mu, .Unlocked, new_state, .Acquire, .Consume) + if ok { + return + } + + if state == .Waiting { + break spin_lock + } + + for i := min(spin+1, 32); i > 0; i -= 1 { + intrinsics.cpu_relax() + } + } + + // Set just in case 100 iterations did not do it + new_state = .Waiting + + for { + if intrinsics.atomic_exchange_explicit(&a.mu, .Waiting, .Acquire) == .Unlocked { + return + } + + assert(intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1) != 0) + intrinsics.cpu_relax() + } + } + + + if v := intrinsics.atomic_exchange_explicit(&a.mu, .Locked, .Acquire); v != .Unlocked { + lock_slow(a, v) + } + } +} + +@(private="file") +unlock :: proc(a: ^WASM_Allocator) { + when intrinsics.has_target_feature("atomics") { + @(cold) + unlock_slow :: proc(a: ^WASM_Allocator) { + for { + s := intrinsics.wasm_memory_atomic_notify32((^u32)(&a.mu), 1) + if s >= 1 { + return + } + } + } + + switch intrinsics.atomic_exchange_explicit(&a.mu, .Unlocked, .Release) { + case .Unlocked: + unreachable() + case .Locked: + // Okay + case .Waiting: + unlock_slow(a) + } + } +} + +@(private="file") +assert_locked :: proc(a: ^WASM_Allocator) { + when intrinsics.has_target_feature("atomics") { + assert(intrinsics.atomic_load(&a.mu) != .Unlocked) + } +} + +@(private="file") +has_alignment_uintptr :: proc(ptr: uintptr, #any_int alignment: uintptr) -> bool { + return ptr & (alignment-1) == 0 +} + +@(private="file") +has_alignment_uint :: proc(ptr: uint, alignment: uint) -> bool { + return ptr & (alignment-1) == 0 +} + +@(private="file") +has_alignment :: proc { + has_alignment_uintptr, + has_alignment_uint, +} + +@(private="file") +REGION_HEADER_SIZE :: 2*size_of(uint) + +@(private="file") +SMALLEST_ALLOCATION_SIZE :: 2*size_of(rawptr) + +// Subdivide regions of free space into distinct circular doubly linked lists, where each linked list +// represents a range of free space blocks. The following function compute_free_list_bucket() converts +// an allocation size to the bucket index that should be looked at. +#assert(NUM_FREE_BUCKETS == 64, "Following function is tailored specifically for the NUM_FREE_BUCKETS == 64 case") +@(private="file") +compute_free_list_bucket :: proc(size: uint) -> uint { + if size < 128 { return (size >> 3) - 1 } + + clz := intrinsics.count_leading_zeros(i32(size)) + bucket_index: i32 = ((clz > 19) \ + ? 110 - (clz<<2) + ((i32)(size >> (u32)(29-clz)) ~ 4) \ + : min( 71 - (clz<<1) + ((i32)(size >> (u32)(30-clz)) ~ 2), NUM_FREE_BUCKETS-1)) + + assert(bucket_index >= 0) + assert(bucket_index < NUM_FREE_BUCKETS) + return uint(bucket_index) +} + +@(private="file") +prev_region :: proc(region: ^Region) -> ^Region { + prev_region_size := ([^]uint)(region)[-1] + prev_region_size = prev_region_size & ~uint(FREE_REGION_FLAG) + return (^Region)(uintptr(region)-uintptr(prev_region_size)) +} + +@(private="file") +next_region :: proc(region: ^Region) -> ^Region { + return (^Region)(uintptr(region)+uintptr(region.size)) +} + +@(private="file") +region_ceiling_size :: proc(region: ^Region) -> uint { + return ([^]uint)(uintptr(region)+uintptr(region.size))[-1] +} + +@(private="file") +region_is_free :: proc(r: ^Region) -> bool { + return region_ceiling_size(r) & FREE_REGION_FLAG >= 1 +} + +@(private="file") +region_is_in_use :: proc(r: ^Region) -> bool { + return r.size == region_ceiling_size(r) +} + +@(private="file") +region_payload_start_ptr :: proc(r: ^Region) -> [^]byte { + return ([^]byte)(r)[size_of(uint):] +} + +@(private="file") +region_payload_end_ptr :: proc(r: ^Region) -> [^]byte { + return ([^]byte)(r)[r.size-size_of(uint):] +} + +@(private="file") +create_used_region :: proc(ptr: rawptr, size: uint) { + assert(has_alignment(uintptr(ptr), size_of(uint))) + assert(has_alignment(size, size_of(uint))) + assert(size >= size_of(Region)) + + uptr := ([^]uint)(ptr) + uptr[0] = size + uptr[size/size_of(uint)-1] = size +} + +@(private="file") +create_free_region :: proc(ptr: rawptr, size: uint) { + assert(has_alignment(uintptr(ptr), size_of(uint))) + assert(has_alignment(size, size_of(uint))) + assert(size >= size_of(Region)) + + free_region := (^Region)(ptr) + free_region.size = size + ([^]uint)(ptr)[size/size_of(uint)-1] = size | FREE_REGION_FLAG +} + +@(private="file") +prepend_to_free_list :: proc(region: ^Region, prepend_to: ^Region) { + assert(region_is_free(region)) + region.next = prepend_to + region.prev = prepend_to.prev + prepend_to.prev = region + region.prev.next = region +} + +@(private="file") +unlink_from_free_list :: proc(region: ^Region) { + assert(region_is_free(region)) + region.prev.next = region.next + region.next.prev = region.prev +} + +@(private="file") +link_to_free_list :: proc(a: ^WASM_Allocator, free_region: ^Region) { + assert(free_region.size >= size_of(Region)) + bucket_index := compute_free_list_bucket(free_region.size-REGION_HEADER_SIZE) + free_list_head := &a.free_region_buckets[bucket_index] + free_region.prev = free_list_head + free_region.next = free_list_head.next + free_list_head.next = free_region + free_region.next.prev = free_region + a.free_region_buckets_used |= BUCKET_BITMASK_T(1) << bucket_index +} + +@(private="file") +claim_more_memory :: proc(a: ^WASM_Allocator, num_bytes: uint) -> bool { + + PAGE_SIZE :: 64 * 1024 + + page_alloc :: proc(page_count: int) -> []byte { + prev_page_count := intrinsics.wasm_memory_grow(0, uintptr(page_count)) + if prev_page_count < 0 { return nil } + + ptr := ([^]byte)(uintptr(prev_page_count) * PAGE_SIZE) + return ptr[:page_count * PAGE_SIZE] + } + + alloc :: proc(a: ^WASM_Allocator, num_bytes: uint) -> (bytes: [^]byte) #no_bounds_check { + if uint(len(a.spill)) >= num_bytes { + bytes = raw_data(a.spill[:num_bytes]) + a.spill = a.spill[num_bytes:] + return + } + + pages := int((num_bytes / PAGE_SIZE) + 1) + allocated := page_alloc(pages) + if allocated == nil { return nil } + + // If the allocated memory is a direct continuation of the spill from before, + // we can just extend the spill. + spill_end := uintptr(raw_data(a.spill)) + uintptr(len(a.spill)) + if spill_end == uintptr(raw_data(allocated)) { + raw_spill := transmute(^Raw_Slice)(&a.spill) + raw_spill.len += len(allocated) + } else { + // Otherwise, we have to "waste" the previous spill. + // Now this is probably uncommon, and will only happen if another code path + // is also requesting pages. + a.spill = allocated + } + + bytes = raw_data(a.spill) + a.spill = a.spill[num_bytes:] + return + } + + num_bytes := num_bytes + num_bytes = align_forward(num_bytes, a.alignment) + + start_ptr := alloc(a, uint(num_bytes)) + if start_ptr == nil { return false } + + assert(has_alignment(uintptr(start_ptr), align_of(uint))) + end_ptr := start_ptr[num_bytes:] + + end_sentinel_region := (^Region)(end_ptr[-size_of(Region):]) + create_used_region(end_sentinel_region, size_of(Region)) + + // If we are the sole user of wasm_memory_grow(), it will feed us continuous/consecutive memory addresses - take advantage + // of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size. + prev_alloc_end_address := a.list_of_all_regions != nil ? a.list_of_all_regions.end_ptr : nil + if start_ptr == prev_alloc_end_address { + prev_end_sentinel := prev_region((^Region)(start_ptr)) + assert(region_is_in_use(prev_end_sentinel)) + prev_region := prev_region(prev_end_sentinel) + + a.list_of_all_regions.end_ptr = end_ptr + + // Two scenarios, either the last region of the previous block was in use, in which case we need to create + // a new free region in the newly allocated space; or it was free, in which case we can extend that region + // to cover a larger size. + if region_is_free(prev_region) { + new_free_region_size := uint(uintptr(end_sentinel_region) - uintptr(prev_region)) + unlink_from_free_list(prev_region) + create_free_region(prev_region, new_free_region_size) + link_to_free_list(a, prev_region) + return true + } + + start_ptr = start_ptr[-size_of(Region):] + } else { + create_used_region(start_ptr, size_of(Region)) + + new_region_block := (^Root_Region)(start_ptr) + new_region_block.next = a.list_of_all_regions + new_region_block.end_ptr = end_ptr + a.list_of_all_regions = new_region_block + start_ptr = start_ptr[size_of(Region):] + } + + create_free_region(start_ptr, uint(uintptr(end_sentinel_region)-uintptr(start_ptr))) + link_to_free_list(a, (^Region)(start_ptr)) + return true +} + +@(private="file") +validate_alloc_size :: proc(size: uint) -> uint { + #assert(size_of(uint) >= size_of(uintptr)) + #assert(size_of(uint) % size_of(uintptr) == 0) + + // NOTE: emmalloc aligns this forward on pointer size, but I think that is a mistake and will + // do bad on wasm64p32. + + validated_size := size > SMALLEST_ALLOCATION_SIZE ? align_forward(size, size_of(uint)) : SMALLEST_ALLOCATION_SIZE + assert(validated_size >= size) // Assert we haven't wrapped. + + return validated_size +} + +@(private="file") +allocate_memory :: proc(a: ^WASM_Allocator, alignment: uint, size: uint, loc := #caller_location) -> rawptr { + + attempt_allocate :: proc(a: ^WASM_Allocator, free_region: ^Region, alignment, size: uint) -> rawptr { + assert_locked(a) + free_region := free_region + + payload_start_ptr := uintptr(region_payload_start_ptr(free_region)) + payload_start_ptr_aligned := align_forward(payload_start_ptr, uintptr(alignment)) + payload_end_ptr := uintptr(region_payload_end_ptr(free_region)) + + if payload_start_ptr_aligned + uintptr(size) > payload_end_ptr { + return nil + } + + // We have enough free space, so the memory allocation will be made into this region. Remove this free region + // from the list of free regions: whatever slop remains will be later added back to the free region pool. + unlink_from_free_list(free_region) + + // Before we proceed further, fix up the boundary between this and the preceding region, + // so that the boundary between the two regions happens at a right spot for the payload to be aligned. + if payload_start_ptr != payload_start_ptr_aligned { + prev := prev_region(free_region) + assert(region_is_in_use(prev)) + region_boundary_bump_amount := payload_start_ptr_aligned - payload_start_ptr + new_this_region_size := free_region.size - uint(region_boundary_bump_amount) + create_used_region(prev, prev.size + uint(region_boundary_bump_amount)) + free_region = (^Region)(uintptr(free_region) + region_boundary_bump_amount) + free_region.size = new_this_region_size + } + + // Next, we need to decide whether this region is so large that it should be split into two regions, + // one representing the newly used memory area, and at the high end a remaining leftover free area. + // This splitting to two is done always if there is enough space for the high end to fit a region. + // Carve 'size' bytes of payload off this region. So, + // [sz prev next sz] + // becomes + // [sz payload sz] [sz prev next sz] + if size_of(Region) + REGION_HEADER_SIZE + size <= free_region.size { + new_free_region := (^Region)(uintptr(free_region) + REGION_HEADER_SIZE + uintptr(size)) + create_free_region(new_free_region, free_region.size - size - REGION_HEADER_SIZE) + link_to_free_list(a, new_free_region) + create_used_region(free_region, size + REGION_HEADER_SIZE) + } else { + // There is not enough space to split the free memory region into used+free parts, so consume the whole + // region as used memory, not leaving a free memory region behind. + // Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom. + ([^]uint)(uintptr(free_region) + uintptr(free_region.size))[-1] = free_region.size + } + + return rawptr(uintptr(free_region) + size_of(uint)) + } + + assert_locked(a) + assert(is_power_of_two(alignment)) + assert(size <= MAX_ALLOC_SIZE, "allocation too big", loc=loc) + + alignment := alignment + alignment = max(alignment, a.alignment) + + size := size + size = validate_alloc_size(size) + + // Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory. + // Under normal alignment conditions this should always be the first or second bucket we look at, but if + // performing an allocation with complex alignment, we may need to look at multiple buckets. + bucket_index := compute_free_list_bucket(size) + bucket_mask := a.free_region_buckets_used >> bucket_index + + // Loop through each bucket that has free regions in it, based on bits set in free_region_buckets_used bitmap. + for bucket_mask != 0 { + index_add := intrinsics.count_trailing_zeros(bucket_mask) + bucket_index += uint(index_add) + bucket_mask >>= index_add + assert(bucket_index <= NUM_FREE_BUCKETS-1) + assert(a.free_region_buckets_used & (BUCKET_BITMASK_T(1) << bucket_index) > 0) + + free_region := a.free_region_buckets[bucket_index].next + assert(free_region != nil) + if free_region != &a.free_region_buckets[bucket_index] { + ptr := attempt_allocate(a, free_region, alignment, size) + if ptr != nil { + return ptr + } + + // We were not able to allocate from the first region found in this bucket, so penalize + // the region by cycling it to the end of the doubly circular linked list. (constant time) + // This provides a randomized guarantee that when performing allocations of size k to a + // bucket of [k-something, k+something] range, we will not always attempt to satisfy the + // allocation from the same available region at the front of the list, but we try each + // region in turn. + unlink_from_free_list(free_region) + prepend_to_free_list(free_region, &a.free_region_buckets[bucket_index]) + // But do not stick around to attempt to look at other regions in this bucket - move + // to search the next populated bucket index if this did not fit. This gives a practical + // "allocation in constant time" guarantee, since the next higher bucket will only have + // regions that are all of strictly larger size than the requested allocation. Only if + // there is a difficult alignment requirement we may fail to perform the allocation from + // a region in the next bucket, and if so, we keep trying higher buckets until one of them + // works. + bucket_index += 1 + bucket_mask >>= 1 + } else { + // This bucket was not populated after all with any regions, + // but we just had a stale bit set to mark a populated bucket. + // Reset the bit to update latest status so that we do not + // redundantly look at this bucket again. + a.free_region_buckets_used &= ~(BUCKET_BITMASK_T(1) << bucket_index) + bucket_mask ~= 1 + } + + assert((bucket_index == NUM_FREE_BUCKETS && bucket_mask == 0) || (bucket_mask == a.free_region_buckets_used >> bucket_index)) + } + + // None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory. + // The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so + // as a last resort, loop through more free regions in the bucket that represents the largest allocations available. + // But only if the bucket representing largest allocations available is not any of the first thirty buckets, + // these represent allocatable areas less than <1024 bytes - which could be a lot of scrap. + // In such case, prefer to claim more memory right away. + largest_bucket_index := NUM_FREE_BUCKETS - 1 - intrinsics.count_leading_zeros(a.free_region_buckets_used) + // free_region will be null if there is absolutely no memory left. (all buckets are 100% used) + free_region := a.free_region_buckets_used > 0 ? a.free_region_buckets[largest_bucket_index].next : nil + // The 30 first free region buckets cover memory blocks < 2048 bytes, so skip looking at those here (too small) + if a.free_region_buckets_used >> 30 > 0 { + // Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior. + // If this many regions cannot find free space, we give up and prefer to claim more memory instead. + max_regions_to_try_before_giving_up :: 99 + num_tries_left := max_regions_to_try_before_giving_up + for ; free_region != &a.free_region_buckets[largest_bucket_index] && num_tries_left > 0; num_tries_left -= 1 { + ptr := attempt_allocate(a, free_region, alignment, size) + if ptr != nil { + return ptr + } + free_region = free_region.next + } + } + + // We were unable to find a free memory region. Must claim more memory! + num_bytes_to_claim := size+size_of(Region)*3 + if alignment > a.alignment { + num_bytes_to_claim += alignment + } + success := claim_more_memory(a, num_bytes_to_claim) + if (success) { + // Try allocate again with the newly available memory. + return allocate_memory(a, alignment, size) + } + + // also claim_more_memory failed, we are really really constrained :( As a last resort, go back to looking at the + // bucket we already looked at above, continuing where the above search left off - perhaps there are + // regions we overlooked the first time that might be able to satisfy the allocation. + if free_region != nil { + for free_region != &a.free_region_buckets[largest_bucket_index] { + ptr := attempt_allocate(a, free_region, alignment, size) + if ptr != nil { + return ptr + } + free_region = free_region.next + } + } + + // Fully out of memory. + return nil +} + +@(private="file") +aligned_alloc :: proc(a: ^WASM_Allocator, alignment, size: uint, loc := #caller_location) -> rawptr { + lock(a) + defer unlock(a) + + return allocate_memory(a, alignment, size, loc) +} + +@(private="file") +free :: proc(a: ^WASM_Allocator, ptr: rawptr, loc := #caller_location) { + if ptr == nil { + return + } + + region_start_ptr := uintptr(ptr) - size_of(uint) + region := (^Region)(region_start_ptr) + assert(has_alignment(region_start_ptr, size_of(uint))) + + lock(a) + defer unlock(a) + + size := region.size + assert(region_is_in_use(region), "double free", loc=loc) + + prev_region_size_field := ([^]uint)(region)[-1] + prev_region_size := prev_region_size_field & ~uint(FREE_REGION_FLAG) + if prev_region_size_field != prev_region_size { + prev_region := (^Region)(uintptr(region) - uintptr(prev_region_size)) + unlink_from_free_list(prev_region) + region_start_ptr = uintptr(prev_region) + size += prev_region_size + } + + next_reg := next_region(region) + size_at_end := (^uint)(region_payload_end_ptr(next_reg))^ + if next_reg.size != size_at_end { + unlink_from_free_list(next_reg) + size += next_reg.size + } + + create_free_region(rawptr(region_start_ptr), size) + link_to_free_list(a, (^Region)(region_start_ptr)) +} + +@(private="file") +aligned_realloc :: proc(a: ^WASM_Allocator, ptr: rawptr, alignment, size: uint, loc := #caller_location) -> rawptr { + + attempt_region_resize :: proc(a: ^WASM_Allocator, region: ^Region, size: uint) -> bool { + lock(a) + defer unlock(a) + + // First attempt to resize this region, if the next region that follows this one + // is a free region. + next_reg := next_region(region) + next_region_end_ptr := uintptr(next_reg) + uintptr(next_reg.size) + size_at_ceiling := ([^]uint)(next_region_end_ptr)[-1] + if next_reg.size != size_at_ceiling { // Next region is free? + assert(region_is_free(next_reg)) + new_next_region_start_ptr := uintptr(region) + uintptr(size) + assert(has_alignment(new_next_region_start_ptr, size_of(uint))) + // Next region does not shrink to too small size? + if new_next_region_start_ptr + size_of(Region) <= next_region_end_ptr { + unlink_from_free_list(next_reg) + create_free_region(rawptr(new_next_region_start_ptr), uint(next_region_end_ptr - new_next_region_start_ptr)) + link_to_free_list(a, (^Region)(new_next_region_start_ptr)) + create_used_region(region, uint(new_next_region_start_ptr - uintptr(region))) + return true + } + // If we remove the next region altogether, allocation is satisfied? + if new_next_region_start_ptr <= next_region_end_ptr { + unlink_from_free_list(next_reg) + create_used_region(region, region.size + next_reg.size) + return true + } + } else { + // Next region is an used region - we cannot change its starting address. However if we are shrinking the + // size of this region, we can create a new free region between this and the next used region. + if size + size_of(Region) <= region.size { + free_region_size := region.size - size + create_used_region(region, size) + free_region := (^Region)(uintptr(region) + uintptr(size)) + create_free_region(free_region, free_region_size) + link_to_free_list(a, free_region) + return true + } else if size <= region.size { + // Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk + // area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case, + // just call it success without doing any work. + return true + } + } + + return false + } + + if ptr == nil { + return aligned_alloc(a, alignment, size, loc) + } + + if size == 0 { + free(a, ptr, loc) + return nil + } + + if size > MAX_ALLOC_SIZE { + return nil + } + + assert(is_power_of_two(alignment)) + assert(has_alignment(uintptr(ptr), alignment), "realloc on different alignment than original allocation", loc=loc) + + size := size + size = validate_alloc_size(size) + + region := (^Region)(uintptr(ptr) - size_of(uint)) + + // Attempt an in-place resize. + if attempt_region_resize(a, region, size + REGION_HEADER_SIZE) { + return ptr + } + + // Can't do it in-place, allocate new region and copy over. + newptr := aligned_alloc(a, alignment, size, loc) + if newptr != nil { + intrinsics.mem_copy(newptr, ptr, min(size, region.size - REGION_HEADER_SIZE)) + free(a, ptr, loc=loc) + } + + return newptr +} |