diff options
| author | Barinzaya <barinzaya@gmail.com> | 2025-07-31 16:51:42 -0400 |
|---|---|---|
| committer | Barinzaya <barinzaya@gmail.com> | 2025-07-31 16:51:42 -0400 |
| commit | 4ef7ed1cbdf675ce62f7f305b6edb9fd76084c6c (patch) | |
| tree | 423bb8db65487a857a56882ebe52a2d8b1813af0 /core | |
| parent | f61dc7d071a95b1cdaaaed6f1451c7e1ddf384c5 (diff) | |
Skip bounds checking on the inner accumulate loop.
This helps performance with SSE (somewhat) and AVX-512 (quite a bit),
but not AVX2 for some reason.
Diffstat (limited to 'core')
| -rw-r--r-- | core/hash/xxhash/xxhash_3.odin | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/core/hash/xxhash/xxhash_3.odin b/core/hash/xxhash/xxhash_3.odin index bd5534f23..fe92f16d9 100644 --- a/core/hash/xxhash/xxhash_3.odin +++ b/core/hash/xxhash/xxhash_3.odin @@ -64,7 +64,7 @@ XXH3_INTERNAL_BUFFER_SIZE :: 256 IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! ** Default allocators will align it correctly if created via `new`, as will - placing this struct on the cache, but if using a custom allocator make sure + placing this struct on the stack, but if using a custom allocator make sure that it handles the alignment correctly! */ XXH3_state :: struct #align(64) { @@ -870,7 +870,7 @@ XXH_PREFETCH_DIST :: 320 XXH3_accumulate :: #force_inline proc( acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) { - for n := uint(0); n < nbStripes; n += 1 { + #no_bounds_check for n := uint(0); n < nbStripes; n += 1 { when !XXH_DISABLE_PREFETCH { in_ptr := &input[n * XXH_STRIPE_LEN] prefetch(in_ptr, XXH_PREFETCH_DIST) |