Merge branch 'master' into llvm-12.0.1-windows

author: gingerBill <gingerBill@users.noreply.github.com> 2021-09-18 12:55:13 +0100
committer: GitHub <noreply@github.com> 2021-09-18 12:55:13 +0100
commit: 6855538729ea4f859f692715c4c231e4a59ac604 (patch)
tree: f9bd2df2289ab97032170151da38b076df00e0f6 /core/hash
parent: 15921d032cfff6994289d1f299bef168f0a49a4e (diff)
parent: 05ac2002e0296c3acccca1d8cffaafb002e43120 (diff)
5 files changed, 609 insertions, 184 deletions
diff --git a/core/hash/xxhash/common.odin b/core/hash/xxhash/common.odin
index 6733bb83e..d49e9c047 100644
--- a/core/hash/xxhash/common.odin
+++ b/core/hash/xxhash/common.odin
@@ -41,13 +41,18 @@ Alignment :: enum {
 }
 
 Error :: enum {
-	Okay = 0,
+	None = 0,
 	Error,
 }
 
-XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, false)
+XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, true)
 
-when !XXH_DISABLE_PREFETCH {
+/*
+	llvm.prefetch fails code generation on Linux.
+*/
+when XXH_DISABLE_PREFETCH {
+	import "core:sys/llvm"
+  
 	prefetch_address :: #force_inline proc(address: rawptr) {
 		intrinsics.prefetch_read_data(address, /*high*/3)
 	}
@@ -55,13 +60,14 @@ when !XXH_DISABLE_PREFETCH {
 		ptr := rawptr(uintptr(address) + offset)
 		prefetch_address(ptr)
 	}
+	prefetch :: proc { prefetch_address, prefetch_offset, }
 } else {
 	prefetch_address :: #force_inline proc(address: rawptr) {
 	}
 	prefetch_offset  :: #force_inline proc(address: rawptr, #any_int offset: uintptr) {
 	}
 }
-prefetch :: proc { prefetch_address, prefetch_offset, }
+
 
 @(optimization_mode="speed")
 XXH_rotl32 :: #force_inline proc(x, r: u32) -> (res: u32) {
diff --git a/core/hash/xxhash/streaming.odin b/core/hash/xxhash/streaming.odin
new file mode 100644
index 000000000..737e37eae
--- /dev/null
+++ b/core/hash/xxhash/streaming.odin
@@ -0,0 +1,372 @@
+/*
+	An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
+	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+
+	Made available under Odin's BSD-3 license, based on the original C code.
+
+	List of contributors:
+		Jeroen van Rijn: Initial implementation.
+*/
+package xxhash
+
+import "core:mem"
+import "core:intrinsics"
+
+/*
+	===   XXH3 128-bit streaming   ===
+
+	All the functions are actually the same as for 64-bit streaming variant.
+	The only difference is the finalization routine.
+*/
+XXH3_128_reset :: proc(state: ^XXH3_state) -> (err: Error) {
+	if state == nil {
+		return .Error
+	}
+	XXH3_reset_internal(state, 0, XXH3_kSecret[:], len(XXH3_kSecret))
+	return .None
+}
+XXH3_64_reset :: XXH3_128_reset
+
+XXH3_128_reset_with_secret :: proc(state: ^XXH3_state, secret: []u8) -> (err: Error) {
+	if state == nil {
+		return .Error
+	}
+	if secret == nil || len(secret) < XXH3_SECRET_SIZE_MIN {
+		return .Error
+	}
+	XXH3_reset_internal(state, 0, secret, len(secret))
+	return .None
+}
+XXH3_64_reset_with_secret :: XXH3_128_reset_with_secret
+
+XXH3_128_reset_with_seed :: proc(state: ^XXH3_state, seed: XXH64_hash) -> (err: Error) {
+	if seed == 0 {
+		return XXH3_128_reset(state)
+	}
+	if seed != state.seed {
+		XXH3_init_custom_secret(state.custom_secret[:], seed)
+	}
+	XXH3_reset_internal(state, seed, nil, XXH_SECRET_DEFAULT_SIZE)
+	return .None
+}
+XXH3_64_reset_with_seed :: XXH3_128_reset_with_seed
+
+XXH3_128_update :: proc(state: ^XXH3_state, input: []u8) -> (err: Error) {
+	if len(input) < XXH3_MIDSIZE_MAX {
+		return .Error
+	}
+	return XXH3_update(state, input, XXH3_accumulate_512, XXH3_scramble_accumulator)
+}
+XXH3_64_update :: XXH3_128_update
+
+XXH3_128_digest :: proc(state: ^XXH3_state) -> (hash: XXH3_128_hash) {
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	if state.total_length > XXH3_MIDSIZE_MAX {
+		acc: [XXH_ACC_NB]XXH64_hash
+		XXH3_digest_long(acc[:], state, secret)
+
+		assert(state.secret_limit + XXH_STRIPE_LEN >= XXH_ACC_NB + XXH_SECRET_MERGEACCS_START)
+		{
+			h128 := XXH128_hash_t{}
+
+			h128.low  = XXH3_mergeAccs(
+				acc[:],
+				secret[XXH_SECRET_MERGEACCS_START:],
+				state.total_length * XXH_PRIME64_1)
+
+			h128.high = XXH3_mergeAccs(
+				acc[:],
+				secret[state.secret_limit + XXH_STRIPE_LEN - size_of(acc) - XXH_SECRET_MERGEACCS_START:],
+				~(u64(state.total_length) * XXH_PRIME64_2))
+
+			return h128.h
+		}
+	}
+	/* len <= XXH3_MIDSIZE_MAX : short code */
+	if state.seed != 0 {
+		return XXH3_128_with_seed(state.buffer[:state.total_length], state.seed)
+	}
+	return XXH3_128_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN])
+}
+
+/*======   Canonical representation   ======*/
+
+XXH3_128_canonical_from_hash :: proc(hash: XXH128_hash_t) -> (canonical: XXH128_canonical) {
+	#assert(size_of(XXH128_canonical) == size_of(XXH128_hash_t))
+
+	t := hash
+	when ODIN_ENDIAN == "little" {
+		t.high = byte_swap(t.high)
+		t.low  = byte_swap(t.low)
+	}
+	mem_copy(&canonical.digest,    &t.high, size_of(u64))
+	mem_copy(&canonical.digest[8], &t.low,  size_of(u64))
+	return
+}
+
+XXH3_128_hash_from_canonical :: proc(src: ^XXH128_canonical) -> (hash: u128) {
+	h := XXH128_hash_t{}
+
+	high := (^u64be)(&src.digest[0])^
+	low  := (^u64be)(&src.digest[8])^
+
+	h.high = u64(high)
+	h.low  = u64(low)
+	return h.h
+}
+
+/* ===   XXH3 streaming   === */
+
+XXH3_init_state :: proc(state: ^XXH3_state) {
+	state.seed = 0
+}
+
+XXH3_create_state :: proc(allocator := context.allocator) -> (res: ^XXH3_state, err: Error) {
+	state, mem_error := mem.new_aligned(XXH3_state, 64, allocator)
+	err = nil if mem_error == nil else .Error
+
+	XXH3_init_state(state)
+	return state, nil
+}
+
+XXH3_destroy_state :: proc(state: ^XXH3_state, allocator := context.allocator) -> (err: Error) {
+	free(state)
+	return .None
+}
+
+XXH3_copy_state :: proc(dest, src: ^XXH3_state) {
+	assert(dest != nil && src != nil)
+	mem_copy(dest, src, size_of(XXH3_state))
+}
+
+XXH3_reset_internal :: proc(state: ^XXH3_state, seed: XXH64_hash, secret: []u8, secret_size: uint) {
+	assert(state != nil)
+
+	init_start  := offset_of(XXH3_state, buffered_size)
+	init_length := offset_of(XXH3_state, stripes_per_block) - init_start
+
+	assert(offset_of(XXH3_state, stripes_per_block) > init_start)
+
+	/*
+		Set members from buffered_size to stripes_per_block (excluded) to 0
+	*/
+	offset  := rawptr(uintptr(state) + uintptr(init_start))
+	intrinsics.mem_zero(offset, init_length)
+
+	state.acc[0] = XXH_PRIME32_3
+	state.acc[1] = XXH_PRIME64_1
+	state.acc[2] = XXH_PRIME64_2
+	state.acc[3] = XXH_PRIME64_3
+	state.acc[4] = XXH_PRIME64_4
+	state.acc[5] = XXH_PRIME32_2
+	state.acc[6] = XXH_PRIME64_5
+	state.acc[7] = XXH_PRIME32_1
+	state.seed = seed
+	state.external_secret = secret
+
+	assert(secret_size >= XXH3_SECRET_SIZE_MIN)
+
+	state.secret_limit = secret_size - XXH_STRIPE_LEN
+	state.stripes_per_block = state.secret_limit / XXH_SECRET_CONSUME_RATE
+}
+
+/*
+	Note: when XXH3_consumeStripes() is invoked, there must be a guarantee that at least
+	one more byte must be consumed from input so that the function can blindly consume
+	all stripes using the "normal" secret segment.
+*/
+
+XXH3_consume_stripes :: #force_inline proc(
+		acc: []xxh_u64, stripes_so_far: ^uint, stripes_per_block: uint, input: []u8,
+		number_of_stripes: uint, secret: []u8, secret_limit: uint,
+		f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) {
+
+	assert(number_of_stripes <= stripes_per_block) /* can handle max 1 scramble per invocation */
+	assert(stripes_so_far^ < stripes_per_block)
+
+	if stripes_per_block - stripes_so_far^ <= number_of_stripes {
+		/* need a scrambling operation */
+		stripes_to_end_of_block := stripes_per_block - stripes_so_far^
+		stripes_after_block     := number_of_stripes - stripes_to_end_of_block
+
+		XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], stripes_to_end_of_block, f_acc512)
+
+		f_scramble(acc, secret[secret_limit:])
+		XXH3_accumulate(acc, input[stripes_to_end_of_block * XXH_STRIPE_LEN:], secret, stripes_after_block, f_acc512)
+		stripes_so_far^ = stripes_after_block
+	} else {
+		XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], number_of_stripes, f_acc512)
+		stripes_so_far^ += number_of_stripes
+	}
+}
+
+/*
+	Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+*/
+XXH3_update :: #force_inline proc(
+		state: ^XXH3_state, input: []u8,
+		f_acc512: XXH3_accumulate_512_f,
+		f_scramble: XXH3_scramble_accumulator_f) -> (err: Error) {
+
+	input  := input
+	length := len(input)
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	assert(len(input) > 0)
+
+	state.total_length += u64(length)
+	assert(state.buffered_size <= XXH3_INTERNAL_BUFFER_SIZE)
+
+	if int(state.buffered_size) + length <= XXH3_INTERNAL_BUFFER_SIZE {  /* fill in tmp buffer */
+		mem_copy(&state.buffer[state.buffered_size], &input[0], length)
+		state.buffered_size += u32(length)
+		return .None
+	}
+
+	/* total input is now > XXH3_INTERNAL_BUFFER_SIZE */
+	XXH3_INTERNAL_BUFFER_STRIPES :: XXH3_INTERNAL_BUFFER_SIZE / XXH_STRIPE_LEN
+	#assert(XXH3_INTERNAL_BUFFER_SIZE % XXH_STRIPE_LEN == 0) /* clean multiple */
+
+	/*
+		Internal buffer is partially filled (always, except at beginning)
+		Complete it, then consume it.
+	*/
+	if state.buffered_size > 0 {
+		load_size := int(XXH3_INTERNAL_BUFFER_SIZE - state.buffered_size)
+		mem_copy(&state.buffer[state.buffered_size], &input[0], load_size)
+		input = input[load_size:]
+
+		XXH3_consume_stripes(
+			state.acc[:], &state.stripes_so_far, state.stripes_per_block,
+			state.buffer[:], XXH3_INTERNAL_BUFFER_STRIPES,
+			secret, state.secret_limit, f_acc512, f_scramble)
+		state.buffered_size = 0
+	}
+	assert(len(input) > 0)
+
+	/* Consume input by a multiple of internal buffer size */
+	if len(input) > XXH3_INTERNAL_BUFFER_SIZE {
+		tail := input[:len(input) - XXH_STRIPE_LEN]
+		for len(input) > XXH3_INTERNAL_BUFFER_SIZE {
+			XXH3_consume_stripes(
+				state.acc[:], &state.stripes_so_far, state.stripes_per_block,
+				input, XXH3_INTERNAL_BUFFER_STRIPES,
+				secret, state.secret_limit, f_acc512, f_scramble)
+
+			input = input[XXH3_INTERNAL_BUFFER_SIZE:]
+		}
+		/* for last partial stripe */
+		mem_copy(&state.buffer[XXH3_INTERNAL_BUFFER_SIZE - XXH_STRIPE_LEN], &tail[0], XXH_STRIPE_LEN)
+	}
+
+	length = len(input)
+	assert(length > 0)
+
+	/* Some remaining input (always) : buffer it */
+	mem_copy(&state.buffer[0], &input[0], length)
+	state.buffered_size = u32(length)
+	return .None
+}
+
+XXH3_digest_long :: #force_inline proc(acc: []u64, state: ^XXH3_state, secret: []u8) {
+	/*
+		Digest on a local copy. This way, the state remains unaltered, and it can
+		continue ingesting more input afterwards.
+	*/
+	mem_copy(&acc[0], &state.acc[0], size_of(state.acc))
+
+	if state.buffered_size >= XXH_STRIPE_LEN {
+		number_of_stripes := uint((state.buffered_size - 1) / XXH_STRIPE_LEN)
+		stripes_so_far    := state.stripes_so_far
+
+		XXH3_consume_stripes(
+			acc[:], &stripes_so_far, state.stripes_per_block, state.buffer[:], number_of_stripes,
+			secret, state.secret_limit, XXH3_accumulate_512, XXH3_scramble_accumulator)
+
+		/* last stripe */
+		XXH3_accumulate_512(
+			acc[:],
+			state.buffer[state.buffered_size - XXH_STRIPE_LEN:],
+			secret[state.secret_limit - XXH_SECRET_LASTACC_START:])
+
+	} else {  /* bufferedSize < XXH_STRIPE_LEN */
+		last_stripe: [XXH_STRIPE_LEN]u8
+		catchup_size := int(XXH_STRIPE_LEN) - int(state.buffered_size)
+		assert(state.buffered_size > 0)  /* there is always some input buffered */
+
+		mem_copy(&last_stripe[0],            &state.buffer[XXH3_INTERNAL_BUFFER_SIZE - catchup_size], catchup_size)
+		mem_copy(&last_stripe[catchup_size], &state.buffer[0],                                        int(state.buffered_size))
+		XXH3_accumulate_512(acc[:], last_stripe[:], secret[state.secret_limit - XXH_SECRET_LASTACC_START:])
+	}
+}
+
+XXH3_64_digest :: proc(state: ^XXH3_state) -> (hash: XXH64_hash) {
+	secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:]
+
+	if state.total_length > XXH3_MIDSIZE_MAX {
+		acc: [XXH_ACC_NB]xxh_u64
+		XXH3_digest_long(acc[:], state, secret[:])
+
+		return XXH3_mergeAccs(acc[:], secret[ XXH_SECRET_MERGEACCS_START:], state.total_length * XXH_PRIME64_1)
+	}
+
+	/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+	if state.seed == 0 {
+		return XXH3_64_with_seed(state.buffer[:state.total_length], state.seed)
+	}
+	return XXH3_64_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN])
+}
+
+XXH3_generate_secret :: proc(secret_buffer: []u8, custom_seed: []u8) {
+	secret_length := len(secret_buffer)
+	assert(secret_length >= XXH3_SECRET_SIZE_MIN)
+
+	custom_seed_size := len(custom_seed)
+	if custom_seed_size == 0 {
+		k := XXH3_kSecret
+		mem_copy(&secret_buffer[0], &k[0], XXH_SECRET_DEFAULT_SIZE)
+		return
+	}
+
+	{
+		segment_size :: size_of(XXH128_hash_t)
+		number_of_segments := u64(XXH_SECRET_DEFAULT_SIZE / segment_size)
+
+		seeds: [12]u64le
+		assert(number_of_segments == 12)
+		assert(segment_size * number_of_segments == XXH_SECRET_DEFAULT_SIZE) /* exact multiple */
+
+		scrambler := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(custom_seed[:])})
+
+		/*
+			Copy customSeed to seeds[], truncating or repeating as necessary.
+			TODO: Convert `mem_copy` to slice copies.
+		*/
+		{
+			to_fill := min(custom_seed_size, size_of(seeds))
+			filled  := to_fill
+			mem_copy(&seeds[0], &custom_seed[0], to_fill)
+			for filled < size_of(seeds) {
+				to_fill = min(filled, size_of(seeds) - filled)
+				seed_offset := rawptr(uintptr(&seeds[0]) + uintptr(filled))
+				mem_copy(seed_offset, &seeds[0], to_fill)
+				filled += to_fill
+			}
+		}
+
+		/*
+			Generate secret
+		*/
+		mem_copy(&secret_buffer[0], &scrambler, size_of(scrambler))
+
+		for segment_number := u64(1); segment_number < number_of_segments; segment_number += 1 {
+			segment_start := segment_number * segment_size
+
+			this_seed := u64(seeds[segment_number]) + segment_number
+			segment := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(scrambler.digest[:], this_seed)})
+
+			mem_copy(&secret_buffer[segment_start], &segment, size_of(segment))
+		}
+	}
+}
+\ No newline at end of file
diff --git a/core/hash/xxhash/xxhash_3.odin b/core/hash/xxhash/xxhash_3.odin
index 327a4c847..5bd5537b1 100644
--- a/core/hash/xxhash/xxhash_3.odin
+++ b/core/hash/xxhash/xxhash_3.odin
@@ -8,91 +8,29 @@
 		Jeroen van Rijn: Initial implementation.
 */
 package xxhash
+
 import "core:intrinsics"
 
-/* *********************************************************************
-*  XXH3
-*  New generation hash designed for speed on small keys and vectorization
-************************************************************************
+/*
+*************************************************************************
+* XXH3
+* New generation hash designed for speed on small keys and vectorization
+*************************************************************************
 * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
 * remaining a true 64-bit/128-bit hash function.
-*
-* This is done by prioritizing a subset of 64-bit operations that can be
-* emulated without too many steps on the average 32-bit machine.
-*
-* For example, these two lines seem similar, and run equally fast on 64-bit:
-*
-*   xxh_u64 x;
-*   x ^= (x >> 47); // good
-*   x ^= (x >> 13); // bad
-*
-* However, to a 32-bit machine, there is a major difference.
-*
-* x ^= (x >> 47) looks like this:
-*
-*   x.lo ^= (x.hi >> (47 - 32));
-*
-* while x ^= (x >> 13) looks like this:
-*
-*   // note: funnel shifts are not usually cheap.
-*   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
-*   x.hi ^= (x.hi >> 13);
-*
-* The first one is significantly faster than the second, simply because the
-* shift is larger than 32. This means:
-*  - All the bits we need are in the upper 32 bits, so we can ignore the lower
-*    32 bits in the shift.
-*  - The shift result will always fit in the lower 32 bits, and therefore,
-*    we can ignore the upper 32 bits in the xor.
-*
-* Thanks to this optimization, XXH3 only requires these features to be efficient:
-*
-*  - Usable unaligned access
-*  - A 32-bit or 64-bit ALU
-*      - If 32-bit, a decent ADC instruction
-*  - A 32 or 64-bit multiply with a 64-bit result
-*  - For the 128-bit variant, a decent byteswap helps short inputs.
-*
-* The first two are already required by XXH32, and almost all 32-bit and 64-bit
-* platforms which can run XXH32 can run XXH3 efficiently.
-*
-* Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
-* notable exception.
-*
-* First of all, Thumb-1 lacks support for the UMULL instruction which
-* performs the important long multiply. This means numerous __aeabi_lmul
-* calls.
-*
-* Second of all, the 8 functional registers are just not enough.
-* Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
-* Lo registers, and this shuffling results in thousands more MOVs than A32.
-*
-* A32 and T32 don't have this limitation. They can access all 14 registers,
-* do a 32->64 multiply with UMULL, and the flexible operand allowing free
-* shifts is helpful, too.
-*
-* Therefore, we do a quick sanity check.
-*
-* If compiling Thumb-1 for a target which supports ARM instructions, we will
-* emit a warning, as it is not a "sane" platform to compile for.
-*
-* Usually, if this happens, it is because of an accident and you probably need
-* to specify -march, as you likely meant to compile for a newer architecture.
-*
-* Credit: large sections of the vectorial and asm source code paths
-*         have been contributed by @easyaspi314
+* ==========================================
+* XXH3 default settings
+* ==========================================
 */
 
-XXH_ACC_ALIGN :: 8                 /* scalar */
-
-/* ==========================================
- * XXH3 default settings
- * ========================================== */
-
-XXH3_SECRET_SIZE_MIN    :: 136
+/*
+	Custom secrets have a default length of 192, but can be set to a different size.
+	The minimum secret size is 136 bytes. It must also be a multiple of 64.
+*/
 XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192))
+#assert(XXH_SECRET_DEFAULT_SIZE % 64 == 0)
 
-XXH3_kSecret :: [?]u8{
+XXH3_kSecret := [XXH_SECRET_DEFAULT_SIZE]u8{
 	0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
 	0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
 	0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
@@ -106,8 +44,42 @@ XXH3_kSecret :: [?]u8{
 	0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
 	0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 }
-#assert(size_of(XXH3_kSecret) == 192)
+/*
+	Do not change this constant.
+*/
+XXH3_SECRET_SIZE_MIN    :: 136
+#assert(len(XXH3_kSecret) == 192 && len(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN)
 
+XXH_ACC_ALIGN           :: 8   /* scalar */
+
+/*
+	This is the optimal update size for incremental hashing.
+*/
+XXH3_INTERNAL_BUFFER_SIZE :: 256
+
+/*
+	Streaming state.
+
+	IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! **
+	Do not allocate this with `make()` or `new`, it will not be sufficiently aligned.
+	Use`XXH3_create_state` and `XXH3_destroy_state, or stack allocation.
+*/
+XXH3_state :: struct {
+	acc:               [8]u64,
+	custom_secret:     [XXH_SECRET_DEFAULT_SIZE]u8,
+	buffer:            [XXH3_INTERNAL_BUFFER_SIZE]u8,
+	buffered_size:     u32,
+	reserved32:        u32,
+	stripes_so_far:    uint,
+	total_length:      u64,
+	stripes_per_block: uint,
+	secret_limit:      uint,
+	seed:              u64,
+	reserved64:        u64,
+	external_secret:   []u8,
+}
+#assert(offset_of(XXH3_state, acc)    % 64 == 0 && offset_of(XXH3_state, custom_secret) % 64 == 0 &&
+		offset_of(XXH3_state, buffer) % 64 == 0)
 
 /************************************************************************
 *  XXH3 128-bit variant
@@ -118,7 +90,6 @@ XXH3_kSecret :: [?]u8{
 */
 xxh_u128              :: u128
 XXH3_128_hash         :: u128
-XXH3_128_DEFAULT_SEED :: xxh_u64(0)
 
 XXH128_hash_t :: struct #raw_union {
 	using raw: struct {
@@ -129,14 +100,8 @@ XXH128_hash_t :: struct #raw_union {
 }
 #assert(size_of(xxh_u128) == size_of(XXH128_hash_t))
 
-@(optimization_mode="speed")
-XXH_mul_32_to_64 :: #force_inline proc(x, y: xxh_u32) -> (res: xxh_u64) {
-	return u64(x) * u64(y)
-}
-
-@(optimization_mode="speed")
-XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) {
-	return xxh_u128(lhs) * xxh_u128(rhs)
+XXH128_canonical :: struct {
+	digest: [size_of(XXH128_hash_t)]u8,
 }
 
 /*
@@ -148,9 +113,8 @@ XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) {
 */
 @(optimization_mode="speed")
 XXH_mul_64_to_128_fold_64 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u64) {
-	t  := XXH128_hash_t{}
-	t.h = #force_inline XXH_mul_64_to_128(lhs, rhs)
-	return t.low ~ t.high
+	t := u128(lhs) * u128(rhs)
+	return u64(t & 0xFFFFFFFFFFFFFFFF) ~ u64(t >> 64)
 }
 
 @(optimization_mode="speed")
@@ -186,12 +150,12 @@ XXH3_rrmxmx :: #force_inline proc(h64, length: xxh_u64) -> (res: xxh_u64) {
 
 /*
 	==========================================
-	       XXH3 128 bits (a.k.a XXH128)
+		   XXH3 128 bits (a.k.a XXH128)
 	==========================================
 	XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
 	even without counting the significantly larger output size.
 
- 	For example, extra steps are taken to avoid the seed-dependent collisions
+	For example, extra steps are taken to avoid the seed-dependent collisions
 	in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
 
 	This strength naturally comes at the cost of some speed, especially on short
@@ -241,7 +205,7 @@ XXH3_len_4to8_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u6
 
 		/* Shift len to the left to ensure it is even, this avoids even multiplies. */
 		m128 := XXH128_hash_t{
-			h = XXH_mul_64_to_128(keyed, u64(XXH_PRIME64_1) + (u64(length) << 2)),
+			h = u128(keyed) * (XXH_PRIME64_1 + u128(length) << 2),
 		}
 		m128.high += (m128.low  << 1)
 		m128.low  ~= (m128.high >> 3)
@@ -265,7 +229,7 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u
 		input_lo := XXH64_read64(input[0:])
 		input_hi := XXH64_read64(input[length - 8:])
 		m128     := XXH128_hash_t{
-			h = XXH_mul_64_to_128(input_lo ~ input_hi ~ bitflipl, XXH_PRIME64_1),
+			h = u128(input_lo ~ input_hi ~ bitflipl) * XXH_PRIME64_1,
 		}
 		/*
 		 * Put len in the middle of m128 to ensure that the length gets mixed to
@@ -277,49 +241,14 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u
 		 * Add the high 32 bits of input_hi to the high 32 bits of m128, then
 		 * add the long product of the low 32 bits of input_hi and XXH_XXH_PRIME32_2 to
 		 * the high 64 bits of m128.
-		 *
-		 * The best approach to this operation is different on 32-bit and 64-bit.
 		 */
-		when size_of(rawptr) == 4 { /* 32-bit */
-			/*
-			 * 32-bit optimized version, which is more readable.
-			 *
-			 * On 32-bit, it removes an ADC and delays a dependency between the two
-			 * halves of m128.high64, but it generates an extra mask on 64-bit.
-			 */
-			m128.high += (input_hi & 0xFFFFFFFF00000000) + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2)
-		} else {
-			/*
-			 * 64-bit optimized (albeit more confusing) version.
-			 *
-			 * Uses some properties of addition and multiplication to remove the mask:
-			 *
-			 * Let:
-			 *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
-			 *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
-			 *    c = XXH_XXH_PRIME32_2
-			 *
-			 *    a + (b * c)
-			 * Inverse Property: x + y - x == y
-			 *    a + (b * (1 + c - 1))
-			 * Distributive Property: x * (y + z) == (x * y) + (x * z)
-			 *    a + (b * 1) + (b * (c - 1))
-			 * Identity Property: x * 1 == x
-			 *    a + b + (b * (c - 1))
-			 *
-			 * Substitute a, b, and c:
-			 *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1))
-			 *
-			 * Since input_hi.hi + input_hi.lo == input_hi, we get this:
-			 *    input_hi + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1))
-			 */
-			m128.high += input_hi + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2 - 1)
-		}
+		m128.high += input_hi + u64(u32(input_hi)) * u64(XXH_PRIME32_2 - 1)
+
 		/* m128 ^= XXH_swap64(m128 >> 64); */
 		m128.low ~= byte_swap(m128.high)
 		{   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
 			h128 := XXH128_hash_t{
-				h = XXH_mul_64_to_128(m128.low, XXH_PRIME64_2),
+				h = u128(m128.low) * XXH_PRIME64_2,
 			}
 			h128.high += m128.high * XXH_PRIME64_2
 			h128.low   = XXH3_avalanche(h128.low)
@@ -364,9 +293,6 @@ XXH128_mix32B :: #force_inline proc(acc: xxh_u128, input_1: []u8, input_2: []u8,
 	}
 }
 
-
-
-
 @(optimization_mode="speed")
 XXH3_len_17to128_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
 	length := len(input)
@@ -410,18 +336,18 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx
 		i: int
 		#no_bounds_check for i = 0; i < 4; i += 1 {
 			acc.h = XXH128_mix32B(acc.h,
-				                  input[32 * i:],
-				                  input [32 * i + 16:],
-				                  secret[32 * i:],
-				                  seed)
+								  input[32 * i:],
+								  input [32 * i + 16:],
+								  secret[32 * i:],
+								  seed)
 		}
 		acc.low  = XXH3_avalanche(acc.low)
 		acc.high = XXH3_avalanche(acc.high)
 
 		#no_bounds_check for i = 4; i < nbRounds; i += 1 {
 			acc.h = XXH128_mix32B(acc.h,
-				                  input[32 * i:], input[32 * i + 16:],
-				                  secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
+								  input[32 * i:], input[32 * i + 16:],
+								  secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
 								  seed)
 		}
 		/* last bytes */
@@ -435,9 +361,9 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx
 			h128 := XXH128_hash_t{}
 			h128.low  = acc.low + acc.high
 			h128.high = u64(
-				        u128(acc.low  * XXH_PRIME64_1) \
-			          + u128(acc.high * XXH_PRIME64_4) \
-			          + u128((u64(length) - seed) * XXH_PRIME64_2))
+						u128(acc.low  * XXH_PRIME64_1) \
+					  + u128(acc.high * XXH_PRIME64_4) \
+					  + u128((u64(length) - seed) * XXH_PRIME64_2))
 			h128.low  = XXH3_avalanche(h128.low)
 			h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
 			return h128.h
@@ -481,18 +407,20 @@ XXH3_hashLong_128b_internal :: #force_inline proc(
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+@(optimization_mode="speed")
 XXH3_hashLong_128b_default :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
-	k_secret := XXH3_kSecret
-	return XXH3_hashLong_128b_internal(input, k_secret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
+	return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
 
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+@(optimization_mode="speed")
 XXH3_hashLong_128b_withSecret :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
 	return XXH3_hashLong_128b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
 }
 
+@(optimization_mode="speed")
 XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
 								input: []u8, seed: xxh_u64, secret: []u8,
 								f_acc512: XXH3_accumulate_512_f,
@@ -500,26 +428,27 @@ XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
 								f_initSec: XXH3_init_custom_secret_f) -> (res: XXH3_128_hash) {
 
 	if seed == 0 {
-		k := XXH3_kSecret
-		return XXH3_hashLong_128b_internal(input, k[:], f_acc512, f_scramble)
+		return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
 	}
 
 	{
-		secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
-		f_initSec(secret[:], seed)
-		return XXH3_hashLong_128b_internal(input, secret[:], f_acc512, f_scramble)
+		_secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
+		f_initSec(_secret[:], seed)
+		return XXH3_hashLong_128b_internal(input, _secret[:], f_acc512, f_scramble)
 	}
 }
 
 /*
  * It's important for performance that XXH3_hashLong is not inlined.
  */
+ @(optimization_mode="speed")
 XXH3_hashLong_128b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
 	return XXH3_hashLong_128b_withSeed_internal(input, seed, secret, XXH3_accumulate_512, XXH3_scramble_accumulator , XXH3_init_custom_secret)
 }
 
 XXH3_hashLong128_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8)  -> (res: XXH3_128_hash)
 
+@(optimization_mode="speed")
 XXH3_128bits_internal :: #force_inline proc(
 	input: []u8, seed: xxh_u64, secret: []u8, f_hl128: XXH3_hashLong128_f) -> (res: XXH3_128_hash) {
 
@@ -545,13 +474,21 @@ XXH3_128bits_internal :: #force_inline proc(
 }
 
 /* ===   Public XXH128 API   === */
-
-XXH3_128bits :: proc(input: []u8) -> (hash: XXH3_128_hash) {
-	k := XXH3_kSecret
-	return XXH3_128bits_internal(input, XXH3_128_DEFAULT_SEED, k[:], XXH3_hashLong_128b_default)
+@(optimization_mode="speed")
+XXH3_128_default :: proc(input: []u8) -> (hash: XXH3_128_hash) {
+	return XXH3_128bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
 }
 
+@(optimization_mode="speed")
+XXH3_128_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: XXH3_128_hash) {
+	return XXH3_128bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
+}
 
+@(optimization_mode="speed")
+XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) {
+	return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret)
+}
+XXH3_128 :: proc { XXH3_128_default, XXH3_128_with_seed, XXH3_128_with_secret }
 
 /*
 	==========================================
@@ -613,7 +550,8 @@ XXH3_len_4to8_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64
 	assert(secret != nil)
 	seed := seed
 
-	seed ~= u64(byte_swap(u32(seed) << 32))
+	seed ~= (u64(byte_swap(u32(seed))) << 32)
+
 	#no_bounds_check {
 		input1  := XXH32_read32(input)
 		input2  := XXH32_read32(input[length - 4:])
@@ -756,9 +694,10 @@ XXH3_len_129to240_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res:
 
 /* =======     Long Keys     ======= */
 
-XXH_STRIPE_LEN          :: 64
-XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */
-XXH_ACC_NB              :: (XXH_STRIPE_LEN / size_of(xxh_u64))
+XXH_STRIPE_LEN           :: 64
+XXH_SECRET_CONSUME_RATE  :: 8 /* nb of secret bytes consumed at each accumulation */
+XXH_ACC_NB               :: (XXH_STRIPE_LEN / size_of(xxh_u64))
+XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */
 
 @(optimization_mode="speed")
 XXH_writeLE64 :: #force_inline proc(dst: []u8, v64: u64le) {
@@ -808,9 +747,10 @@ XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, se
 
 	#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
 		data_val    := XXH64_read64(xinput[8 * i:])
-		data_key    := data_val ~ XXH64_read64(xsecret[8 * i:])
+		sec := XXH64_read64(xsecret[8 * i:])
+		data_key    := data_val ~ sec
 		xacc[i ~ 1] += data_val /* swap adjacent lanes */
-		xacc[i    ] += XXH_mul_32_to_64(u32(data_key & 0xFFFFFFFF), u32(data_key >> 32))
+		xacc[i    ] += u64(u128(u32(data_key)) * u128(u64(data_key >> 32)))
 	}
 }
 
@@ -835,12 +775,10 @@ XXH3_scramble_accumulator_scalar :: #force_inline proc(acc: []xxh_u64, secret: [
 XXH3_init_custom_secret_scalar :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
 	#assert((XXH_SECRET_DEFAULT_SIZE & 15) == 0)
 
-	kSecretPtr := XXH3_kSecret
-
 	nbRounds := XXH_SECRET_DEFAULT_SIZE / 16
 	#no_bounds_check for i := 0; i < nbRounds; i += 1 {
-		lo := XXH64_read64(kSecretPtr[16 * i:    ]) + seed64
-		hi := XXH64_read64(kSecretPtr[16 * i + 8:]) - seed64
+		lo := XXH64_read64(XXH3_kSecret[16 * i:    ]) + seed64
+		hi := XXH64_read64(XXH3_kSecret[16 * i + 8:]) - seed64
 		XXH_writeLE64(custom_secret[16 * i:    ], u64le(lo))
 		XXH_writeLE64(custom_secret[16 * i + 8:], u64le(hi))
 	}
@@ -854,8 +792,8 @@ XXH_PREFETCH_DIST :: 320
  * Assumption: nbStripes will not overflow the secret size
  */
 @(optimization_mode="speed")
-XXH3_accumulate :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint,
-	f_acc512: XXH3_accumulate_512_f) {
+XXH3_accumulate :: #force_inline proc(
+	acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) {
 
 	for n := uint(0); n < nbStripes; n += 1 {
 		when !XXH_DISABLE_PREFETCH {
@@ -885,13 +823,11 @@ XXH3_hashLong_internal_loop :: #force_inline proc(acc: []xxh_u64, input: []u8, s
 	/* last partial block */
 	#no_bounds_check {
 		stripes := ((length - 1) - (block_len * blocks)) / XXH_STRIPE_LEN
-
 		XXH3_accumulate(acc, input[blocks * block_len:], secret, stripes, f_acc512)
 
 		/* last stripe */
 		#no_bounds_check {
 			p := input[length - XXH_STRIPE_LEN:]
-			XXH_SECRET_LASTACC_START :: 7  /* not aligned on 8, last secret is different from acc & scrambler */
 			f_acc512(acc, p, secret[secret_size - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START:])
 		}
 	}
@@ -911,4 +847,115 @@ XXH3_mergeAccs :: #force_inline proc(acc: []xxh_u64, secret: []u8, start: xxh_u6
 		result64 += XXH3_mix2Accs(acc[2 * i:], secret[16 * i:])
 	}
 	return XXH3_avalanche(result64)
-}
-\ No newline at end of file
+}
+
+@(optimization_mode="speed")
+XXH3_hashLong_64b_internal :: #force_inline proc(input: []u8, secret: []u8,
+			f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) -> (hash: xxh_u64) {
+
+	acc: [XXH_ACC_NB]xxh_u64 = XXH3_INIT_ACC
+
+	XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble)
+
+	/* converge into final hash */
+	#assert(size_of(acc) == 64)
+	/* do not align on 8, so that the secret is different from the accumulator */
+	XXH_SECRET_MERGEACCS_START :: 11
+	assert(len(secret) >= size_of(acc) + XXH_SECRET_MERGEACCS_START)
+	return XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], xxh_u64(len(input)) * XXH_PRIME64_1)
+}
+
+/*
+	It's important for performance that XXH3_hashLong is not inlined.
+*/
+@(optimization_mode="speed")
+XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
+	return XXH3_hashLong_64b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
+}
+
+/*
+	It's important for performance that XXH3_hashLong is not inlined.
+	Since the function is not inlined, the compiler may not be able to understand that,
+	in some scenarios, its `secret` argument is actually a compile time constant.
+	This variant enforces that the compiler can detect that,
+	and uses this opportunity to streamline the generated code for better performance.
+*/
+@(optimization_mode="speed")
+XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
+	return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
+}
+
+/*
+	XXH3_hashLong_64b_withSeed():
+	Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+	and then use this key for long mode hashing.
+
+	This operation is decently fast but nonetheless costs a little bit of time.
+	Try to avoid it whenever possible (typically when seed==0).
+
+	It's important for performance that XXH3_hashLong is not inlined. Not sure
+	why (uop cache maybe?), but the difference is large and easily measurable.
+*/
+@(optimization_mode="speed")
+XXH3_hashLong_64b_withSeed_internal :: #force_no_inline proc(input: []u8,
+									seed:        xxh_u64,
+									f_acc512:    XXH3_accumulate_512_f,
+									f_scramble:  XXH3_scramble_accumulator_f,
+									f_init_sec:  XXH3_init_custom_secret_f) -> (hash: xxh_u64) {
+	if seed == 0 {
+		return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
+	}
+	{
+		secret: [XXH_SECRET_DEFAULT_SIZE]u8
+		f_init_sec(secret[:], seed)
+		return XXH3_hashLong_64b_internal(input, secret[:], f_acc512, f_scramble)
+	}
+}
+
+/*
+	It's important for performance that XXH3_hashLong is not inlined.
+*/
+@(optimization_mode="speed")
+XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
+	return XXH3_hashLong_64b_withSeed_internal(input, seed, XXH3_accumulate_512, XXH3_scramble_accumulator, XXH3_init_custom_secret)
+}
+
+
+XXH3_hashLong64_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8)  -> (res: xxh_u64)
+
+@(optimization_mode="speed")
+XXH3_64bits_internal :: proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLong: XXH3_hashLong64_f) -> (hash: xxh_u64) {
+	assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
+	/*
+		If an action is to be taken if len(secret) condition is not respected, it should be done here.
+		For now, it's a contract pre-condition.
+		Adding a check and a branch here would cost performance at every hash.
+		Also, note that function signature doesn't offer room to return an error.
+	*/
+	length := len(input)
+	switch {
+	case length <=  16: return XXH3_len_0to16_64b(input, secret, seed)
+	case length <= 128: return XXH3_len_17to128_64b(input, secret, seed)
+	case length <= XXH3_MIDSIZE_MAX: return XXH3_len_129to240_64b(input, secret, seed)
+	case: return f_hashLong(input, seed, secret)
+	}
+	unreachable()
+}
+
+/* ===   Public entry point   === */
+@(optimization_mode="speed")
+XXH3_64_default :: proc(input: []u8) -> (hash: xxh_u64) {
+	return XXH3_64bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_64b_default)
+}
+
+@(optimization_mode="speed")
+XXH3_64_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: xxh_u64) {
+	return XXH3_64bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_64b_withSeed)
+}
+
+@(optimization_mode="speed")
+XXH3_64_with_secret :: proc(input, secret: []u8) -> (hash: xxh_u64) {
+	return XXH3_64bits_internal(input, 0, secret, XXH3_hashLong_64b_withSecret)
+}
+
+XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }
+\ No newline at end of file
diff --git a/core/hash/xxhash/xxhash_32.odin b/core/hash/xxhash/xxhash_32.odin
index f41161133..e63d998dd 100644
--- a/core/hash/xxhash/xxhash_32.odin
+++ b/core/hash/xxhash/xxhash_32.odin
@@ -197,12 +197,12 @@ XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) {
 */
 XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) {
 	state := new(XXH32_state, allocator)
-	return state, nil if state != nil else .Error
+	return state, .None if state != nil else .Error
 }
 
 XXH32_destroy_state :: proc(state: ^XXH32_state, allocator := context.allocator) -> (err: Error) {
 	free(state, allocator)
-	return nil
+	return .None
 }
 
 XXH32_copy_state :: proc(dest, src: ^XXH32_state) {
@@ -221,7 +221,7 @@ XXH32_reset_state :: proc(state_ptr: ^XXH32_state, seed := XXH32_DEFAULT_SEED) -
 		Do not write into reserved, planned to be removed in a future version.
 	*/
 	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved))
-	return nil
+	return .None
 }
 
 XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
@@ -236,7 +236,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
 		ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize)
 		mem_copy(rawptr(ptr), raw_data(input), int(length))
 		state.memsize += XXH32_hash(length)
-		return nil
+		return .None
 	}
 
 	if state.memsize > 0 {/* Some data left from previous update */
@@ -276,7 +276,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) {
 		mem_copy(raw_data(state.mem32[:]), raw_data(buf[:]), int(length))
 		state.memsize = u32(length)
 	}
-	return nil
+	return .None
 }
 
 XXH32_digest :: proc(state: ^XXH32_state) -> (res: XXH32_hash) {
diff --git a/core/hash/xxhash/xxhash_64.odin b/core/hash/xxhash/xxhash_64.odin
index d535a134c..e95842168 100644
--- a/core/hash/xxhash/xxhash_64.odin
+++ b/core/hash/xxhash/xxhash_64.odin
@@ -163,12 +163,12 @@ XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) {
 */
 XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) {
 	state := new(XXH64_state, allocator)
-	return state, nil if state != nil else .Error
+	return state, .None if state != nil else .Error
 }
 
 XXH64_destroy_state :: proc(state: ^XXH64_state, allocator := context.allocator) -> (err: Error) {
 	free(state, allocator)
-	return nil
+	return .None
 }
 
 XXH64_copy_state :: proc(dest, src: ^XXH64_state) {
@@ -187,7 +187,7 @@ XXH64_reset_state :: proc(state_ptr: ^XXH64_state, seed := XXH64_DEFAULT_SEED) -
 		Fo not write into reserved64, might be removed in a future version.
 	*/
 	mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved64))
-	return nil
+	return .None
 }
 
 @(optimization_mode="speed")
@@ -201,7 +201,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) {
 		ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize)
 		mem_copy(rawptr(ptr), raw_data(input), int(length))
 		state.memsize += u32(length)
-		return nil
+		return .None
 	}
 
 	if state.memsize > 0 {   /* tmp buffer is full */
@@ -241,7 +241,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) {
 		mem_copy(raw_data(state.mem64[:]), raw_data(buf[:]), int(length))
 		state.memsize = u32(length)
 	}
-	return nil
+	return .None
 }
 
 @(optimization_mode="speed")
author	gingerBill <gingerBill@users.noreply.github.com>	2021-09-18 12:55:13 +0100
committer	GitHub <noreply@github.com>	2021-09-18 12:55:13 +0100
commit	6855538729ea4f859f692715c4c231e4a59ac604 (patch)
tree	f9bd2df2289ab97032170151da38b076df00e0f6 /core/hash
parent	15921d032cfff6994289d1f299bef168f0a49a4e (diff)
parent	05ac2002e0296c3acccca1d8cffaafb002e43120 (diff)