From 4ef7ed1cbdf675ce62f7f305b6edb9fd76084c6c Mon Sep 17 00:00:00 2001
From: Barinzaya <barinzaya@gmail.com>
Date: Thu, 31 Jul 2025 16:51:42 -0400
Subject: Skip bounds checking on the inner accumulate loop.

This helps performance with SSE (somewhat) and AVX-512 (quite a bit),
but not AVX2 for some reason.
---
 core/hash/xxhash/xxhash_3.odin | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/hash/xxhash/xxhash_3.odin b/core/hash/xxhash/xxhash_3.odin
index bd5534f23..fe92f16d9 100644
--- a/core/hash/xxhash/xxhash_3.odin
+++ b/core/hash/xxhash/xxhash_3.odin
@@ -64,7 +64,7 @@ XXH3_INTERNAL_BUFFER_SIZE :: 256
 
 	IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! **
 	Default allocators will align it correctly if created via `new`, as will
-	placing this struct on the cache, but if using a custom allocator make sure
+	placing this struct on the stack, but if using a custom allocator make sure
 	that it handles the alignment correctly!
 */
 XXH3_state :: struct #align(64) {
@@ -870,7 +870,7 @@ XXH_PREFETCH_DIST :: 320
 XXH3_accumulate :: #force_inline proc(
 	acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) {
 
-	for n := uint(0); n < nbStripes; n += 1 {
+	#no_bounds_check for n := uint(0); n < nbStripes; n += 1 {
 		when !XXH_DISABLE_PREFETCH {
 			in_ptr := &input[n * XXH_STRIPE_LEN]
 			prefetch(in_ptr, XXH_PREFETCH_DIST)
-- 
cgit v1.2.3