aboutsummaryrefslogtreecommitdiff
path: root/core/hash
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-09-11 23:45:08 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-09-11 23:45:08 +0200
commit00c1d341089518feb04f43431d5404f12c2969ec (patch)
tree3d17595ca01a65d6ac12b9589d90cb554da8c399 /core/hash
parent87f18154861445a54beae0caedfd39169813bb4a (diff)
xxhash: Extra (generated) tests.
Diffstat (limited to 'core/hash')
-rw-r--r--core/hash/xxhash/common.odin7
-rw-r--r--core/hash/xxhash/xxhash_3.odin769
2 files changed, 646 insertions, 130 deletions
diff --git a/core/hash/xxhash/common.odin b/core/hash/xxhash/common.odin
index aac5af7e1..434ebb905 100644
--- a/core/hash/xxhash/common.odin
+++ b/core/hash/xxhash/common.odin
@@ -45,12 +45,12 @@ Error :: enum {
Error,
}
-XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, false)
+XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, true)
/*
llvm.prefetch fails code generation on Linux.
*/
-when !XXH_DISABLE_PREFETCH && ODIN_OS == "windows" {
+when XXH_DISABLE_PREFETCH {
import "core:sys/llvm"
prefetch_address :: #force_inline proc(address: rawptr) {
@@ -60,11 +60,12 @@ when !XXH_DISABLE_PREFETCH && ODIN_OS == "windows" {
ptr := rawptr(uintptr(address) + offset)
prefetch_address(ptr)
}
+ prefetch :: proc { prefetch_address, prefetch_offset, }
} else {
prefetch_address :: #force_inline proc(address: rawptr) {}
prefetch_offset :: #force_inline proc(address: rawptr, auto_cast offset: uintptr) {}
}
-prefetch :: proc { prefetch_address, prefetch_offset, }
+
@(optimization_mode="speed")
XXH_rotl32 :: #force_inline proc(x, r: u32) -> (res: u32) {
diff --git a/core/hash/xxhash/xxhash_3.odin b/core/hash/xxhash/xxhash_3.odin
index 327a4c847..415ed928d 100644
--- a/core/hash/xxhash/xxhash_3.odin
+++ b/core/hash/xxhash/xxhash_3.odin
@@ -8,6 +8,7 @@
Jeroen van Rijn: Initial implementation.
*/
package xxhash
+
import "core:intrinsics"
/* *********************************************************************
@@ -16,79 +17,14 @@ import "core:intrinsics"
************************************************************************
* One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
* remaining a true 64-bit/128-bit hash function.
-*
-* This is done by prioritizing a subset of 64-bit operations that can be
-* emulated without too many steps on the average 32-bit machine.
-*
-* For example, these two lines seem similar, and run equally fast on 64-bit:
-*
-* xxh_u64 x;
-* x ^= (x >> 47); // good
-* x ^= (x >> 13); // bad
-*
-* However, to a 32-bit machine, there is a major difference.
-*
-* x ^= (x >> 47) looks like this:
-*
-* x.lo ^= (x.hi >> (47 - 32));
-*
-* while x ^= (x >> 13) looks like this:
-*
-* // note: funnel shifts are not usually cheap.
-* x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
-* x.hi ^= (x.hi >> 13);
-*
-* The first one is significantly faster than the second, simply because the
-* shift is larger than 32. This means:
-* - All the bits we need are in the upper 32 bits, so we can ignore the lower
-* 32 bits in the shift.
-* - The shift result will always fit in the lower 32 bits, and therefore,
-* we can ignore the upper 32 bits in the xor.
-*
-* Thanks to this optimization, XXH3 only requires these features to be efficient:
-*
-* - Usable unaligned access
-* - A 32-bit or 64-bit ALU
-* - If 32-bit, a decent ADC instruction
-* - A 32 or 64-bit multiply with a 64-bit result
-* - For the 128-bit variant, a decent byteswap helps short inputs.
-*
-* The first two are already required by XXH32, and almost all 32-bit and 64-bit
-* platforms which can run XXH32 can run XXH3 efficiently.
-*
-* Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
-* notable exception.
-*
-* First of all, Thumb-1 lacks support for the UMULL instruction which
-* performs the important long multiply. This means numerous __aeabi_lmul
-* calls.
-*
-* Second of all, the 8 functional registers are just not enough.
-* Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
-* Lo registers, and this shuffling results in thousands more MOVs than A32.
-*
-* A32 and T32 don't have this limitation. They can access all 14 registers,
-* do a 32->64 multiply with UMULL, and the flexible operand allowing free
-* shifts is helpful, too.
-*
-* Therefore, we do a quick sanity check.
-*
-* If compiling Thumb-1 for a target which supports ARM instructions, we will
-* emit a warning, as it is not a "sane" platform to compile for.
-*
-* Usually, if this happens, it is because of an accident and you probably need
-* to specify -march, as you likely meant to compile for a newer architecture.
-*
-* Credit: large sections of the vectorial and asm source code paths
-* have been contributed by @easyaspi314
*/
-XXH_ACC_ALIGN :: 8 /* scalar */
-
/* ==========================================
* XXH3 default settings
* ========================================== */
+XXH_ACC_ALIGN :: 8 /* scalar */
+
XXH3_SECRET_SIZE_MIN :: 136
XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192))
@@ -129,16 +65,6 @@ XXH128_hash_t :: struct #raw_union {
}
#assert(size_of(xxh_u128) == size_of(XXH128_hash_t))
-@(optimization_mode="speed")
-XXH_mul_32_to_64 :: #force_inline proc(x, y: xxh_u32) -> (res: xxh_u64) {
- return u64(x) * u64(y)
-}
-
-@(optimization_mode="speed")
-XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) {
- return xxh_u128(lhs) * xxh_u128(rhs)
-}
-
/*
The reason for the separate function is to prevent passing too many structs
around by value. This will hopefully inline the multiply, but we don't force it.
@@ -148,9 +74,8 @@ XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) {
*/
@(optimization_mode="speed")
XXH_mul_64_to_128_fold_64 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u64) {
- t := XXH128_hash_t{}
- t.h = #force_inline XXH_mul_64_to_128(lhs, rhs)
- return t.low ~ t.high
+ t := u128(lhs) * u128(rhs)
+ return u64(t & 0xFFFFFFFFFFFFFFFF) ~ u64(t >> 64)
}
@(optimization_mode="speed")
@@ -241,7 +166,7 @@ XXH3_len_4to8_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u6
/* Shift len to the left to ensure it is even, this avoids even multiplies. */
m128 := XXH128_hash_t{
- h = XXH_mul_64_to_128(keyed, u64(XXH_PRIME64_1) + (u64(length) << 2)),
+ h = u128(keyed) * (XXH_PRIME64_1 + u128(length) << 2),
}
m128.high += (m128.low << 1)
m128.low ~= (m128.high >> 3)
@@ -265,7 +190,7 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u
input_lo := XXH64_read64(input[0:])
input_hi := XXH64_read64(input[length - 8:])
m128 := XXH128_hash_t{
- h = XXH_mul_64_to_128(input_lo ~ input_hi ~ bitflipl, XXH_PRIME64_1),
+ h = u128(input_lo ~ input_hi ~ bitflipl) * XXH_PRIME64_1,
}
/*
* Put len in the middle of m128 to ensure that the length gets mixed to
@@ -277,49 +202,14 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u
* Add the high 32 bits of input_hi to the high 32 bits of m128, then
* add the long product of the low 32 bits of input_hi and XXH_XXH_PRIME32_2 to
* the high 64 bits of m128.
- *
- * The best approach to this operation is different on 32-bit and 64-bit.
*/
- when size_of(rawptr) == 4 { /* 32-bit */
- /*
- * 32-bit optimized version, which is more readable.
- *
- * On 32-bit, it removes an ADC and delays a dependency between the two
- * halves of m128.high64, but it generates an extra mask on 64-bit.
- */
- m128.high += (input_hi & 0xFFFFFFFF00000000) + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2)
- } else {
- /*
- * 64-bit optimized (albeit more confusing) version.
- *
- * Uses some properties of addition and multiplication to remove the mask:
- *
- * Let:
- * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
- * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
- * c = XXH_XXH_PRIME32_2
- *
- * a + (b * c)
- * Inverse Property: x + y - x == y
- * a + (b * (1 + c - 1))
- * Distributive Property: x * (y + z) == (x * y) + (x * z)
- * a + (b * 1) + (b * (c - 1))
- * Identity Property: x * 1 == x
- * a + b + (b * (c - 1))
- *
- * Substitute a, b, and c:
- * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1))
- *
- * Since input_hi.hi + input_hi.lo == input_hi, we get this:
- * input_hi + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1))
- */
- m128.high += input_hi + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2 - 1)
- }
+ m128.high += input_hi + u64(u32(input_hi)) * u64(XXH_PRIME32_2 - 1)
+
/* m128 ^= XXH_swap64(m128 >> 64); */
m128.low ~= byte_swap(m128.high)
{ /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
h128 := XXH128_hash_t{
- h = XXH_mul_64_to_128(m128.low, XXH_PRIME64_2),
+ h = u128(m128.low) * XXH_PRIME64_2,
}
h128.high += m128.high * XXH_PRIME64_2
h128.low = XXH3_avalanche(h128.low)
@@ -505,9 +395,9 @@ XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
}
{
- secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
- f_initSec(secret[:], seed)
- return XXH3_hashLong_128b_internal(input, secret[:], f_acc512, f_scramble)
+ _secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
+ f_initSec(_secret[:], seed)
+ return XXH3_hashLong_128b_internal(input, _secret[:], f_acc512, f_scramble)
}
}
@@ -546,12 +436,144 @@ XXH3_128bits_internal :: #force_inline proc(
/* === Public XXH128 API === */
-XXH3_128bits :: proc(input: []u8) -> (hash: XXH3_128_hash) {
+XXH3_128_with_seed :: proc(input: []u8, seed := XXH3_128_DEFAULT_SEED) -> (hash: XXH3_128_hash) {
k := XXH3_kSecret
- return XXH3_128bits_internal(input, XXH3_128_DEFAULT_SEED, k[:], XXH3_hashLong_128b_default)
+ return XXH3_128bits_internal(input, seed, k[:], XXH3_hashLong_128b_withSeed)
+}
+
+XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) {
+ return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret)
+}
+XXH3_128 :: proc { XXH3_128_with_seed, XXH3_128_with_secret }
+
+/*
+
+/* === XXH3 128-bit streaming === */
+
+/*
+ * All the functions are actually the same as for 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH3_state_t* statePtr)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, secret, secretSize);
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (seed==0) return XXH3_128bits_reset(statePtr);
+ if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+ XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+ return XXH3_update(state, (const xxh_u8*)input, len,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ { XXH128_hash_t h128;
+ h128.low64 = XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ h128.high64 = XXH3_mergeAccs(acc,
+ secret + state->secretLimit + XXH_STRIPE_LEN
+ - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+ ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+ return h128;
+ }
+ }
+ /* len <= XXH3_MIDSIZE_MAX : short code */
+ if (state->seed)
+ return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+/* 128-bit utility functions */
+
+#include <string.h> /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+ /* note : XXH128_hash_t is compact, it has no padding byte */
+ return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * return : >0 if *h128_1 > *h128_2
+ * <0 if *h128_1 < *h128_2
+ * =0 if *h128_1 == *h128_2 */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+{
+ XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+ XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+ int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+ /* note : bets that, in most cases, hash values are different */
+ if (hcmp) return hcmp;
+ return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
}
+/*====== Canonical representation ======*/
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) {
+ hash.high64 = XXH_swap64(hash.high64);
+ hash.low64 = XXH_swap64(hash.low64);
+ }
+ memcpy(dst, &hash.high64, sizeof(hash.high64));
+ memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+{
+ XXH128_hash_t h;
+ h.high64 = XXH_readBE64(src);
+ h.low64 = XXH_readBE64(src->digest + 8);
+ return h;
+}
+
+*/
+
/*
==========================================
@@ -810,7 +832,7 @@ XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, se
data_val := XXH64_read64(xinput[8 * i:])
data_key := data_val ~ XXH64_read64(xsecret[8 * i:])
xacc[i ~ 1] += data_val /* swap adjacent lanes */
- xacc[i ] += XXH_mul_32_to_64(u32(data_key & 0xFFFFFFFF), u32(data_key >> 32))
+ xacc[i ] += u64(u32(data_key)) * u64(data_key >> 32)
}
}
@@ -911,4 +933,497 @@ XXH3_mergeAccs :: #force_inline proc(acc: []xxh_u64, secret: []u8, start: xxh_u6
result64 += XXH3_mix2Accs(acc[2 * i:], secret[16 * i:])
}
return XXH3_avalanche(result64)
-} \ No newline at end of file
+}
+
+
+/*
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+ const void* XXH_RESTRICT secret, size_t secretSize,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+ XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+
+ /* converge into final hash */
+ XXH_STATIC_ASSERT(sizeof(acc) == 64);
+ /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+ return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64;
+ return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ * Since the function is not inlined, the compiler may not be able to understand that,
+ * in some scenarios, its `secret` argument is actually a compile time constant.
+ * This variant enforces that the compiler can detect that,
+ * and uses this opportunity to streamline the generated code for better performance.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+ (void)seed64; (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+ XXH64_hash_t seed,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble,
+ XXH3_f_initCustomSecret f_initSec)
+{
+ if (seed == 0)
+ return XXH3_hashLong_64b_internal(input, len,
+ XXH3_kSecret, sizeof(XXH3_kSecret),
+ f_acc512, f_scramble);
+ { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+ f_initSec(secret, seed);
+ return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+ f_acc512, f_scramble);
+ }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* input, size_t len,
+ XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+{
+ (void)secret; (void)secretLen;
+ return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+ XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+ XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+ XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+ XXH3_hashLong64_f f_hashLong)
+{
+ XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+ /*
+ * If an action is to be taken if `secretLen` condition is not respected,
+ * it should be done here.
+ * For now, it's a contract pre-condition.
+ * Adding a check and a branch here would cost performance at every hash.
+ * Also, note that function signature doesn't offer room to return an error.
+ */
+ if (len <= 16)
+ return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+ if (len <= 128)
+ return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+ return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* === Public entry point === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+{
+ return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+ return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+
+/* === XXH3 streaming === */
+
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static void* XXH_alignedMalloc(size_t s, size_t align)
+{
+ XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+ XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
+ XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
+ { /* Overallocate to make room for manual realignment and an offset byte */
+ xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+ if (base != NULL) {
+ /*
+ * Get the offset needed to align this pointer.
+ *
+ * Even if the returned pointer is aligned, there will always be
+ * at least one byte to store the offset to the original pointer.
+ */
+ size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+ /* Add the offset for the now-aligned pointer */
+ xxh_u8* ptr = base + offset;
+
+ XXH_ASSERT((size_t)ptr % align == 0);
+
+ /* Store the offset immediately before the returned pointer. */
+ ptr[-1] = (xxh_u8)offset;
+ return ptr;
+ }
+ return NULL;
+ }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+ if (p != NULL) {
+ xxh_u8* ptr = (xxh_u8*)p;
+ /* Get the offset byte we added in XXH_malloc. */
+ xxh_u8 offset = ptr[-1];
+ /* Free the original malloc'd pointer */
+ xxh_u8* base = ptr - offset;
+ XXH_free(base);
+ }
+}
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+ XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+ if (state==NULL) return NULL;
+ XXH3_INITSTATE(state);
+ return state;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+ XXH_alignedFree(statePtr);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+{
+ memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_reset_internal(XXH3_state_t* statePtr,
+ XXH64_hash_t seed,
+ const void* secret, size_t secretSize)
+{
+ size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+ size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+ XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+ XXH_ASSERT(statePtr != NULL);
+ /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+ memset((char*)statePtr + initStart, 0, initLength);
+ statePtr->acc[0] = XXH_XXH_PRIME32_3;
+ statePtr->acc[1] = XXH_PRIME64_1;
+ statePtr->acc[2] = XXH_PRIME64_2;
+ statePtr->acc[3] = XXH_PRIME64_3;
+ statePtr->acc[4] = XXH_PRIME64_4;
+ statePtr->acc[5] = XXH_XXH_PRIME32_2;
+ statePtr->acc[6] = XXH_PRIME64_5;
+ statePtr->acc[7] = XXH_XXH_PRIME32_1;
+ statePtr->seed = seed;
+ statePtr->extSecret = (const unsigned char*)secret;
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+ statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+ statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH3_state_t* statePtr)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ XXH3_reset_internal(statePtr, 0, secret, secretSize);
+ if (secret == NULL) return XXH_ERROR;
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+ if (statePtr == NULL) return XXH_ERROR;
+ if (seed==0) return XXH3_64bits_reset(statePtr);
+ if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+ XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ return XXH_OK;
+}
+
+/* Note : when XXH3_consumeStripes() is invoked,
+ * there must be a guarantee that at least one more byte must be consumed from input
+ * so that the function can blindly consume all stripes using the "normal" secret segment */
+XXH_FORCE_INLINE void
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+ size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+ const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+ const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
+ XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+ if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
+ /* need a scrambling operation */
+ size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
+ size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
+ XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
+ f_scramble(acc, secret + secretLimit);
+ XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
+ *nbStripesSoFarPtr = nbStripesAfterBlock;
+ } else {
+ XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+ *nbStripesSoFarPtr += nbStripes;
+ }
+}
+
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* state,
+ const xxh_u8* input, size_t len,
+ XXH3_f_accumulate_512 f_acc512,
+ XXH3_f_scrambleAcc f_scramble)
+{
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ { const xxh_u8* const bEnd = input + len;
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+
+ state->totalLen += len;
+ XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+ if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */
+ XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+ state->bufferedSize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
+ /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+
+ #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+ XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
+
+ /*
+ * Internal buffer is partially filled (always, except at beginning)
+ * Complete it, then consume it.
+ */
+ if (state->bufferedSize) {
+ size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+ XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+ input += loadSize;
+ XXH3_consumeStripes(state->acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+ secret, state->secretLimit,
+ f_acc512, f_scramble);
+ state->bufferedSize = 0;
+ }
+ XXH_ASSERT(input < bEnd);
+
+ /* Consume input by a multiple of internal buffer size */
+ if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
+ const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+ do {
+ XXH3_consumeStripes(state->acc,
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
+ input, XXH3_INTERNALBUFFER_STRIPES,
+ secret, state->secretLimit,
+ f_acc512, f_scramble);
+ input += XXH3_INTERNALBUFFER_SIZE;
+ } while (input<limit);
+ /* for last partial stripe */
+ memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+ }
+ XXH_ASSERT(input < bEnd);
+
+ /* Some remaining input (always) : buffer it */
+ XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+ state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+ }
+
+ return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+ return XXH3_update(state, (const xxh_u8*)input, len,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+ const XXH3_state_t* state,
+ const unsigned char* secret)
+{
+ /*
+ * Digest on a local copy. This way, the state remains unaltered, and it can
+ * continue ingesting more input afterwards.
+ */
+ memcpy(acc, state->acc, sizeof(state->acc));
+ if (state->bufferedSize >= XXH_STRIPE_LEN) {
+ size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+ size_t nbStripesSoFar = state->nbStripesSoFar;
+ XXH3_consumeStripes(acc,
+ &nbStripesSoFar, state->nbStripesPerBlock,
+ state->buffer, nbStripes,
+ secret, state->secretLimit,
+ XXH3_accumulate_512, XXH3_scrambleAcc);
+ /* last stripe */
+ XXH3_accumulate_512(acc,
+ state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ } else { /* bufferedSize < XXH_STRIPE_LEN */
+ xxh_u8 lastStripe[XXH_STRIPE_LEN];
+ size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+ XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
+ memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+ memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+ XXH3_accumulate_512(acc,
+ lastStripe,
+ secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+ }
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+{
+ const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+ if (state->totalLen > XXH3_MIDSIZE_MAX) {
+ XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+ XXH3_digest_long(acc, state, secret);
+ return XXH3_mergeAccs(acc,
+ secret + XXH_SECRET_MERGEACCS_START,
+ (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ }
+ /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+ if (state->seed)
+ return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+ return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+ secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
+{
+ XXH_ASSERT(secretBuffer != NULL);
+ if (customSeedSize == 0) {
+ memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+ return;
+ }
+ XXH_ASSERT(customSeed != NULL);
+
+ { size_t const segmentSize = sizeof(XXH128_hash_t);
+ size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
+ XXH128_canonical_t scrambler;
+ XXH64_hash_t seeds[12];
+ size_t segnb;
+ XXH_ASSERT(nbSegments == 12);
+ XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
+ XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+
+ /*
+ * Copy customSeed to seeds[], truncating or repeating as necessary.
+ */
+ { size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
+ size_t filled = toFill;
+ memcpy(seeds, customSeed, toFill);
+ while (filled < sizeof(seeds)) {
+ toFill = XXH_MIN(filled, sizeof(seeds) - filled);
+ memcpy((char*)seeds + filled, seeds, toFill);
+ filled += toFill;
+ } }
+
+ /* generate secret */
+ memcpy(secretBuffer, &scrambler, sizeof(scrambler));
+ for (segnb=1; segnb < nbSegments; segnb++) {
+ size_t const segmentStart = segnb * segmentSize;
+ XXH128_canonical_t segment;
+ XXH128_canonicalFromHash(&segment,
+ XXH128(&scrambler, sizeof(scrambler), XXH64_read64(seeds + segnb) + segnb) );
+ memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
+ } }
+}
+
+*/