diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2025-09-27 10:17:23 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-27 10:17:23 +0100 |
| commit | 2c97b4ee4d1867d31467f9c25503260e4a76c5dc (patch) | |
| tree | 2cdd70de534681db523a3479e45cd02796ea85ad | |
| parent | 0eaf3ee7cd1b02fd694697fe581c143b0a87c3a3 (diff) | |
| parent | 2baa19f73ce72045f0eea941c90a5eaac16fc1a6 (diff) | |
Merge pull request #5718 from odin-lang/bill/conditional-zero
Add `runtime.conditional_mem_zero` to improve `heap_allocator` performance on non-Windows systems
| -rw-r--r-- | base/runtime/heap_allocator.odin | 10 | ||||
| -rw-r--r-- | base/runtime/internal.odin | 49 |
2 files changed, 55 insertions, 4 deletions
diff --git a/base/runtime/heap_allocator.odin b/base/runtime/heap_allocator.odin index f2c887759..e2667a78c 100644 --- a/base/runtime/heap_allocator.odin +++ b/base/runtime/heap_allocator.odin @@ -71,10 +71,12 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode, new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return - // NOTE: heap_resize does not zero the new memory, so we do it - if zero_memory && new_size > old_size { - new_region := raw_data(new_memory[old_size:]) - intrinsics.mem_zero(new_region, new_size - old_size) + when ODIN_OS != .Windows { + // NOTE: heap_resize does not zero the new memory, so we do it + if zero_memory && new_size > old_size { + new_region := raw_data(new_memory[old_size:]) + conditional_mem_zero(new_region, new_size - old_size) + } } return } diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 8af083d07..0e674aca8 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -230,6 +230,55 @@ non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc) } +conditional_mem_zero :: proc "contextless" (data: rawptr, n_: int) #no_bounds_check { + // When acquiring memory from the OS for the first time it's likely that the + // OS already gives the zero page mapped multiple times for the request. The + // actual allocation does not have physical pages allocated to it until those + // pages are written to which causes a page-fault. This is often called COW + // (Copy on Write) + // + // You do not want to actually zero out memory in this case because it would + // cause a bunch of page faults decreasing the speed of allocations and + // increase the amount of actual resident physical memory used. + // + // Instead a better technique is to check if memory is zerored before zeroing + // it. This turns out to be an important optimization in practice, saving + // nearly half (or more) the amount of physical memory used by an application. + // This is why every implementation of calloc in libc does this optimization. + // + // It may seem counter-intuitive but most allocations in an application are + // wasted and never used. When you consider something like a [dynamic]T which + // always doubles in capacity on resize but you rarely ever actually use the + // full capacity of a dynamic array it means you have a lot of resident waste + // if you actually zeroed the remainder of the memory. + // + // Keep in mind the OS is already guaranteed to give you zeroed memory by + // mapping in this zero page multiple times so in the best case there is no + // need to actually zero anything. As for testing all this memory for a zero + // value, it costs nothing because the the same zero page is used for the + // whole allocation and will exist in L1 cache for the entire zero checking + // process. + + if n_ <= 0 { + return + } + n := uint(n_) + + n_words := n / size_of(uintptr) + p_words := ([^]uintptr)(data)[:n_words] + p_bytes := ([^]byte)(data)[size_of(uintptr) * n_words:n] + for &p_word in p_words { + if p_word != 0 { + p_word = 0 + } + } + for &p_byte in p_bytes { + if p_byte != 0 { + p_byte = 0 + } + } +} + memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool { switch { case n == 0: return true |