diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2021-09-18 12:55:13 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-18 12:55:13 +0100 |
| commit | 6855538729ea4f859f692715c4c231e4a59ac604 (patch) | |
| tree | f9bd2df2289ab97032170151da38b076df00e0f6 /core/hash | |
| parent | 15921d032cfff6994289d1f299bef168f0a49a4e (diff) | |
| parent | 05ac2002e0296c3acccca1d8cffaafb002e43120 (diff) | |
Merge branch 'master' into llvm-12.0.1-windows
Diffstat (limited to 'core/hash')
| -rw-r--r-- | core/hash/xxhash/common.odin | 14 | ||||
| -rw-r--r-- | core/hash/xxhash/streaming.odin | 372 | ||||
| -rw-r--r-- | core/hash/xxhash/xxhash_3.odin | 387 | ||||
| -rw-r--r-- | core/hash/xxhash/xxhash_32.odin | 10 | ||||
| -rw-r--r-- | core/hash/xxhash/xxhash_64.odin | 10 |
5 files changed, 609 insertions, 184 deletions
diff --git a/core/hash/xxhash/common.odin b/core/hash/xxhash/common.odin index 6733bb83e..d49e9c047 100644 --- a/core/hash/xxhash/common.odin +++ b/core/hash/xxhash/common.odin @@ -41,13 +41,18 @@ Alignment :: enum { } Error :: enum { - Okay = 0, + None = 0, Error, } -XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, false) +XXH_DISABLE_PREFETCH :: #config(XXH_DISABLE_PREFETCH, true) -when !XXH_DISABLE_PREFETCH { +/* + llvm.prefetch fails code generation on Linux. +*/ +when XXH_DISABLE_PREFETCH { + import "core:sys/llvm" + prefetch_address :: #force_inline proc(address: rawptr) { intrinsics.prefetch_read_data(address, /*high*/3) } @@ -55,13 +60,14 @@ when !XXH_DISABLE_PREFETCH { ptr := rawptr(uintptr(address) + offset) prefetch_address(ptr) } + prefetch :: proc { prefetch_address, prefetch_offset, } } else { prefetch_address :: #force_inline proc(address: rawptr) { } prefetch_offset :: #force_inline proc(address: rawptr, #any_int offset: uintptr) { } } -prefetch :: proc { prefetch_address, prefetch_offset, } + @(optimization_mode="speed") XXH_rotl32 :: #force_inline proc(x, r: u32) -> (res: u32) { diff --git a/core/hash/xxhash/streaming.odin b/core/hash/xxhash/streaming.odin new file mode 100644 index 000000000..737e37eae --- /dev/null +++ b/core/hash/xxhash/streaming.odin @@ -0,0 +1,372 @@ +/* + An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/). + Copyright 2021 Jeroen van Rijn <nom@duclavier.com>. + + Made available under Odin's BSD-3 license, based on the original C code. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +package xxhash + +import "core:mem" +import "core:intrinsics" + +/* + === XXH3 128-bit streaming === + + All the functions are actually the same as for 64-bit streaming variant. + The only difference is the finalization routine. +*/ +XXH3_128_reset :: proc(state: ^XXH3_state) -> (err: Error) { + if state == nil { + return .Error + } + XXH3_reset_internal(state, 0, XXH3_kSecret[:], len(XXH3_kSecret)) + return .None +} +XXH3_64_reset :: XXH3_128_reset + +XXH3_128_reset_with_secret :: proc(state: ^XXH3_state, secret: []u8) -> (err: Error) { + if state == nil { + return .Error + } + if secret == nil || len(secret) < XXH3_SECRET_SIZE_MIN { + return .Error + } + XXH3_reset_internal(state, 0, secret, len(secret)) + return .None +} +XXH3_64_reset_with_secret :: XXH3_128_reset_with_secret + +XXH3_128_reset_with_seed :: proc(state: ^XXH3_state, seed: XXH64_hash) -> (err: Error) { + if seed == 0 { + return XXH3_128_reset(state) + } + if seed != state.seed { + XXH3_init_custom_secret(state.custom_secret[:], seed) + } + XXH3_reset_internal(state, seed, nil, XXH_SECRET_DEFAULT_SIZE) + return .None +} +XXH3_64_reset_with_seed :: XXH3_128_reset_with_seed + +XXH3_128_update :: proc(state: ^XXH3_state, input: []u8) -> (err: Error) { + if len(input) < XXH3_MIDSIZE_MAX { + return .Error + } + return XXH3_update(state, input, XXH3_accumulate_512, XXH3_scramble_accumulator) +} +XXH3_64_update :: XXH3_128_update + +XXH3_128_digest :: proc(state: ^XXH3_state) -> (hash: XXH3_128_hash) { + secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:] + + if state.total_length > XXH3_MIDSIZE_MAX { + acc: [XXH_ACC_NB]XXH64_hash + XXH3_digest_long(acc[:], state, secret) + + assert(state.secret_limit + XXH_STRIPE_LEN >= XXH_ACC_NB + XXH_SECRET_MERGEACCS_START) + { + h128 := XXH128_hash_t{} + + h128.low = XXH3_mergeAccs( + acc[:], + secret[XXH_SECRET_MERGEACCS_START:], + state.total_length * XXH_PRIME64_1) + + h128.high = XXH3_mergeAccs( + acc[:], + secret[state.secret_limit + XXH_STRIPE_LEN - size_of(acc) - XXH_SECRET_MERGEACCS_START:], + ~(u64(state.total_length) * XXH_PRIME64_2)) + + return h128.h + } + } + /* len <= XXH3_MIDSIZE_MAX : short code */ + if state.seed != 0 { + return XXH3_128_with_seed(state.buffer[:state.total_length], state.seed) + } + return XXH3_128_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN]) +} + +/*====== Canonical representation ======*/ + +XXH3_128_canonical_from_hash :: proc(hash: XXH128_hash_t) -> (canonical: XXH128_canonical) { + #assert(size_of(XXH128_canonical) == size_of(XXH128_hash_t)) + + t := hash + when ODIN_ENDIAN == "little" { + t.high = byte_swap(t.high) + t.low = byte_swap(t.low) + } + mem_copy(&canonical.digest, &t.high, size_of(u64)) + mem_copy(&canonical.digest[8], &t.low, size_of(u64)) + return +} + +XXH3_128_hash_from_canonical :: proc(src: ^XXH128_canonical) -> (hash: u128) { + h := XXH128_hash_t{} + + high := (^u64be)(&src.digest[0])^ + low := (^u64be)(&src.digest[8])^ + + h.high = u64(high) + h.low = u64(low) + return h.h +} + +/* === XXH3 streaming === */ + +XXH3_init_state :: proc(state: ^XXH3_state) { + state.seed = 0 +} + +XXH3_create_state :: proc(allocator := context.allocator) -> (res: ^XXH3_state, err: Error) { + state, mem_error := mem.new_aligned(XXH3_state, 64, allocator) + err = nil if mem_error == nil else .Error + + XXH3_init_state(state) + return state, nil +} + +XXH3_destroy_state :: proc(state: ^XXH3_state, allocator := context.allocator) -> (err: Error) { + free(state) + return .None +} + +XXH3_copy_state :: proc(dest, src: ^XXH3_state) { + assert(dest != nil && src != nil) + mem_copy(dest, src, size_of(XXH3_state)) +} + +XXH3_reset_internal :: proc(state: ^XXH3_state, seed: XXH64_hash, secret: []u8, secret_size: uint) { + assert(state != nil) + + init_start := offset_of(XXH3_state, buffered_size) + init_length := offset_of(XXH3_state, stripes_per_block) - init_start + + assert(offset_of(XXH3_state, stripes_per_block) > init_start) + + /* + Set members from buffered_size to stripes_per_block (excluded) to 0 + */ + offset := rawptr(uintptr(state) + uintptr(init_start)) + intrinsics.mem_zero(offset, init_length) + + state.acc[0] = XXH_PRIME32_3 + state.acc[1] = XXH_PRIME64_1 + state.acc[2] = XXH_PRIME64_2 + state.acc[3] = XXH_PRIME64_3 + state.acc[4] = XXH_PRIME64_4 + state.acc[5] = XXH_PRIME32_2 + state.acc[6] = XXH_PRIME64_5 + state.acc[7] = XXH_PRIME32_1 + state.seed = seed + state.external_secret = secret + + assert(secret_size >= XXH3_SECRET_SIZE_MIN) + + state.secret_limit = secret_size - XXH_STRIPE_LEN + state.stripes_per_block = state.secret_limit / XXH_SECRET_CONSUME_RATE +} + +/* + Note: when XXH3_consumeStripes() is invoked, there must be a guarantee that at least + one more byte must be consumed from input so that the function can blindly consume + all stripes using the "normal" secret segment. +*/ + +XXH3_consume_stripes :: #force_inline proc( + acc: []xxh_u64, stripes_so_far: ^uint, stripes_per_block: uint, input: []u8, + number_of_stripes: uint, secret: []u8, secret_limit: uint, + f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) { + + assert(number_of_stripes <= stripes_per_block) /* can handle max 1 scramble per invocation */ + assert(stripes_so_far^ < stripes_per_block) + + if stripes_per_block - stripes_so_far^ <= number_of_stripes { + /* need a scrambling operation */ + stripes_to_end_of_block := stripes_per_block - stripes_so_far^ + stripes_after_block := number_of_stripes - stripes_to_end_of_block + + XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], stripes_to_end_of_block, f_acc512) + + f_scramble(acc, secret[secret_limit:]) + XXH3_accumulate(acc, input[stripes_to_end_of_block * XXH_STRIPE_LEN:], secret, stripes_after_block, f_acc512) + stripes_so_far^ = stripes_after_block + } else { + XXH3_accumulate(acc, input, secret[stripes_so_far^ * XXH_SECRET_CONSUME_RATE:], number_of_stripes, f_acc512) + stripes_so_far^ += number_of_stripes + } +} + +/* + Both XXH3_64bits_update and XXH3_128bits_update use this routine. +*/ +XXH3_update :: #force_inline proc( + state: ^XXH3_state, input: []u8, + f_acc512: XXH3_accumulate_512_f, + f_scramble: XXH3_scramble_accumulator_f) -> (err: Error) { + + input := input + length := len(input) + secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:] + + assert(len(input) > 0) + + state.total_length += u64(length) + assert(state.buffered_size <= XXH3_INTERNAL_BUFFER_SIZE) + + if int(state.buffered_size) + length <= XXH3_INTERNAL_BUFFER_SIZE { /* fill in tmp buffer */ + mem_copy(&state.buffer[state.buffered_size], &input[0], length) + state.buffered_size += u32(length) + return .None + } + + /* total input is now > XXH3_INTERNAL_BUFFER_SIZE */ + XXH3_INTERNAL_BUFFER_STRIPES :: XXH3_INTERNAL_BUFFER_SIZE / XXH_STRIPE_LEN + #assert(XXH3_INTERNAL_BUFFER_SIZE % XXH_STRIPE_LEN == 0) /* clean multiple */ + + /* + Internal buffer is partially filled (always, except at beginning) + Complete it, then consume it. + */ + if state.buffered_size > 0 { + load_size := int(XXH3_INTERNAL_BUFFER_SIZE - state.buffered_size) + mem_copy(&state.buffer[state.buffered_size], &input[0], load_size) + input = input[load_size:] + + XXH3_consume_stripes( + state.acc[:], &state.stripes_so_far, state.stripes_per_block, + state.buffer[:], XXH3_INTERNAL_BUFFER_STRIPES, + secret, state.secret_limit, f_acc512, f_scramble) + state.buffered_size = 0 + } + assert(len(input) > 0) + + /* Consume input by a multiple of internal buffer size */ + if len(input) > XXH3_INTERNAL_BUFFER_SIZE { + tail := input[:len(input) - XXH_STRIPE_LEN] + for len(input) > XXH3_INTERNAL_BUFFER_SIZE { + XXH3_consume_stripes( + state.acc[:], &state.stripes_so_far, state.stripes_per_block, + input, XXH3_INTERNAL_BUFFER_STRIPES, + secret, state.secret_limit, f_acc512, f_scramble) + + input = input[XXH3_INTERNAL_BUFFER_SIZE:] + } + /* for last partial stripe */ + mem_copy(&state.buffer[XXH3_INTERNAL_BUFFER_SIZE - XXH_STRIPE_LEN], &tail[0], XXH_STRIPE_LEN) + } + + length = len(input) + assert(length > 0) + + /* Some remaining input (always) : buffer it */ + mem_copy(&state.buffer[0], &input[0], length) + state.buffered_size = u32(length) + return .None +} + +XXH3_digest_long :: #force_inline proc(acc: []u64, state: ^XXH3_state, secret: []u8) { + /* + Digest on a local copy. This way, the state remains unaltered, and it can + continue ingesting more input afterwards. + */ + mem_copy(&acc[0], &state.acc[0], size_of(state.acc)) + + if state.buffered_size >= XXH_STRIPE_LEN { + number_of_stripes := uint((state.buffered_size - 1) / XXH_STRIPE_LEN) + stripes_so_far := state.stripes_so_far + + XXH3_consume_stripes( + acc[:], &stripes_so_far, state.stripes_per_block, state.buffer[:], number_of_stripes, + secret, state.secret_limit, XXH3_accumulate_512, XXH3_scramble_accumulator) + + /* last stripe */ + XXH3_accumulate_512( + acc[:], + state.buffer[state.buffered_size - XXH_STRIPE_LEN:], + secret[state.secret_limit - XXH_SECRET_LASTACC_START:]) + + } else { /* bufferedSize < XXH_STRIPE_LEN */ + last_stripe: [XXH_STRIPE_LEN]u8 + catchup_size := int(XXH_STRIPE_LEN) - int(state.buffered_size) + assert(state.buffered_size > 0) /* there is always some input buffered */ + + mem_copy(&last_stripe[0], &state.buffer[XXH3_INTERNAL_BUFFER_SIZE - catchup_size], catchup_size) + mem_copy(&last_stripe[catchup_size], &state.buffer[0], int(state.buffered_size)) + XXH3_accumulate_512(acc[:], last_stripe[:], secret[state.secret_limit - XXH_SECRET_LASTACC_START:]) + } +} + +XXH3_64_digest :: proc(state: ^XXH3_state) -> (hash: XXH64_hash) { + secret := state.custom_secret[:] if len(state.external_secret) == 0 else state.external_secret[:] + + if state.total_length > XXH3_MIDSIZE_MAX { + acc: [XXH_ACC_NB]xxh_u64 + XXH3_digest_long(acc[:], state, secret[:]) + + return XXH3_mergeAccs(acc[:], secret[ XXH_SECRET_MERGEACCS_START:], state.total_length * XXH_PRIME64_1) + } + + /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ + if state.seed == 0 { + return XXH3_64_with_seed(state.buffer[:state.total_length], state.seed) + } + return XXH3_64_with_secret(state.buffer[:state.total_length], secret[:state.secret_limit + XXH_STRIPE_LEN]) +} + +XXH3_generate_secret :: proc(secret_buffer: []u8, custom_seed: []u8) { + secret_length := len(secret_buffer) + assert(secret_length >= XXH3_SECRET_SIZE_MIN) + + custom_seed_size := len(custom_seed) + if custom_seed_size == 0 { + k := XXH3_kSecret + mem_copy(&secret_buffer[0], &k[0], XXH_SECRET_DEFAULT_SIZE) + return + } + + { + segment_size :: size_of(XXH128_hash_t) + number_of_segments := u64(XXH_SECRET_DEFAULT_SIZE / segment_size) + + seeds: [12]u64le + assert(number_of_segments == 12) + assert(segment_size * number_of_segments == XXH_SECRET_DEFAULT_SIZE) /* exact multiple */ + + scrambler := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(custom_seed[:])}) + + /* + Copy customSeed to seeds[], truncating or repeating as necessary. + TODO: Convert `mem_copy` to slice copies. + */ + { + to_fill := min(custom_seed_size, size_of(seeds)) + filled := to_fill + mem_copy(&seeds[0], &custom_seed[0], to_fill) + for filled < size_of(seeds) { + to_fill = min(filled, size_of(seeds) - filled) + seed_offset := rawptr(uintptr(&seeds[0]) + uintptr(filled)) + mem_copy(seed_offset, &seeds[0], to_fill) + filled += to_fill + } + } + + /* + Generate secret + */ + mem_copy(&secret_buffer[0], &scrambler, size_of(scrambler)) + + for segment_number := u64(1); segment_number < number_of_segments; segment_number += 1 { + segment_start := segment_number * segment_size + + this_seed := u64(seeds[segment_number]) + segment_number + segment := XXH3_128_canonical_from_hash(XXH128_hash_t{h=XXH3_128(scrambler.digest[:], this_seed)}) + + mem_copy(&secret_buffer[segment_start], &segment, size_of(segment)) + } + } +}
\ No newline at end of file diff --git a/core/hash/xxhash/xxhash_3.odin b/core/hash/xxhash/xxhash_3.odin index 327a4c847..5bd5537b1 100644 --- a/core/hash/xxhash/xxhash_3.odin +++ b/core/hash/xxhash/xxhash_3.odin @@ -8,91 +8,29 @@ Jeroen van Rijn: Initial implementation. */ package xxhash + import "core:intrinsics" -/* ********************************************************************* -* XXH3 -* New generation hash designed for speed on small keys and vectorization -************************************************************************ +/* +************************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************* * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while * remaining a true 64-bit/128-bit hash function. -* -* This is done by prioritizing a subset of 64-bit operations that can be -* emulated without too many steps on the average 32-bit machine. -* -* For example, these two lines seem similar, and run equally fast on 64-bit: -* -* xxh_u64 x; -* x ^= (x >> 47); // good -* x ^= (x >> 13); // bad -* -* However, to a 32-bit machine, there is a major difference. -* -* x ^= (x >> 47) looks like this: -* -* x.lo ^= (x.hi >> (47 - 32)); -* -* while x ^= (x >> 13) looks like this: -* -* // note: funnel shifts are not usually cheap. -* x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); -* x.hi ^= (x.hi >> 13); -* -* The first one is significantly faster than the second, simply because the -* shift is larger than 32. This means: -* - All the bits we need are in the upper 32 bits, so we can ignore the lower -* 32 bits in the shift. -* - The shift result will always fit in the lower 32 bits, and therefore, -* we can ignore the upper 32 bits in the xor. -* -* Thanks to this optimization, XXH3 only requires these features to be efficient: -* -* - Usable unaligned access -* - A 32-bit or 64-bit ALU -* - If 32-bit, a decent ADC instruction -* - A 32 or 64-bit multiply with a 64-bit result -* - For the 128-bit variant, a decent byteswap helps short inputs. -* -* The first two are already required by XXH32, and almost all 32-bit and 64-bit -* platforms which can run XXH32 can run XXH3 efficiently. -* -* Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one -* notable exception. -* -* First of all, Thumb-1 lacks support for the UMULL instruction which -* performs the important long multiply. This means numerous __aeabi_lmul -* calls. -* -* Second of all, the 8 functional registers are just not enough. -* Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need -* Lo registers, and this shuffling results in thousands more MOVs than A32. -* -* A32 and T32 don't have this limitation. They can access all 14 registers, -* do a 32->64 multiply with UMULL, and the flexible operand allowing free -* shifts is helpful, too. -* -* Therefore, we do a quick sanity check. -* -* If compiling Thumb-1 for a target which supports ARM instructions, we will -* emit a warning, as it is not a "sane" platform to compile for. -* -* Usually, if this happens, it is because of an accident and you probably need -* to specify -march, as you likely meant to compile for a newer architecture. -* -* Credit: large sections of the vectorial and asm source code paths -* have been contributed by @easyaspi314 +* ========================================== +* XXH3 default settings +* ========================================== */ -XXH_ACC_ALIGN :: 8 /* scalar */ - -/* ========================================== - * XXH3 default settings - * ========================================== */ - -XXH3_SECRET_SIZE_MIN :: 136 +/* + Custom secrets have a default length of 192, but can be set to a different size. + The minimum secret size is 136 bytes. It must also be a multiple of 64. +*/ XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192)) +#assert(XXH_SECRET_DEFAULT_SIZE % 64 == 0) -XXH3_kSecret :: [?]u8{ +XXH3_kSecret := [XXH_SECRET_DEFAULT_SIZE]u8{ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, @@ -106,8 +44,42 @@ XXH3_kSecret :: [?]u8{ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, } -#assert(size_of(XXH3_kSecret) == 192) +/* + Do not change this constant. +*/ +XXH3_SECRET_SIZE_MIN :: 136 +#assert(len(XXH3_kSecret) == 192 && len(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN) +XXH_ACC_ALIGN :: 8 /* scalar */ + +/* + This is the optimal update size for incremental hashing. +*/ +XXH3_INTERNAL_BUFFER_SIZE :: 256 + +/* + Streaming state. + + IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! ** + Do not allocate this with `make()` or `new`, it will not be sufficiently aligned. + Use`XXH3_create_state` and `XXH3_destroy_state, or stack allocation. +*/ +XXH3_state :: struct { + acc: [8]u64, + custom_secret: [XXH_SECRET_DEFAULT_SIZE]u8, + buffer: [XXH3_INTERNAL_BUFFER_SIZE]u8, + buffered_size: u32, + reserved32: u32, + stripes_so_far: uint, + total_length: u64, + stripes_per_block: uint, + secret_limit: uint, + seed: u64, + reserved64: u64, + external_secret: []u8, +} +#assert(offset_of(XXH3_state, acc) % 64 == 0 && offset_of(XXH3_state, custom_secret) % 64 == 0 && + offset_of(XXH3_state, buffer) % 64 == 0) /************************************************************************ * XXH3 128-bit variant @@ -118,7 +90,6 @@ XXH3_kSecret :: [?]u8{ */ xxh_u128 :: u128 XXH3_128_hash :: u128 -XXH3_128_DEFAULT_SEED :: xxh_u64(0) XXH128_hash_t :: struct #raw_union { using raw: struct { @@ -129,14 +100,8 @@ XXH128_hash_t :: struct #raw_union { } #assert(size_of(xxh_u128) == size_of(XXH128_hash_t)) -@(optimization_mode="speed") -XXH_mul_32_to_64 :: #force_inline proc(x, y: xxh_u32) -> (res: xxh_u64) { - return u64(x) * u64(y) -} - -@(optimization_mode="speed") -XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) { - return xxh_u128(lhs) * xxh_u128(rhs) +XXH128_canonical :: struct { + digest: [size_of(XXH128_hash_t)]u8, } /* @@ -148,9 +113,8 @@ XXH_mul_64_to_128 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u128) { */ @(optimization_mode="speed") XXH_mul_64_to_128_fold_64 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u64) { - t := XXH128_hash_t{} - t.h = #force_inline XXH_mul_64_to_128(lhs, rhs) - return t.low ~ t.high + t := u128(lhs) * u128(rhs) + return u64(t & 0xFFFFFFFFFFFFFFFF) ~ u64(t >> 64) } @(optimization_mode="speed") @@ -186,12 +150,12 @@ XXH3_rrmxmx :: #force_inline proc(h64, length: xxh_u64) -> (res: xxh_u64) { /* ========================================== - XXH3 128 bits (a.k.a XXH128) + XXH3 128 bits (a.k.a XXH128) ========================================== XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, even without counting the significantly larger output size. - For example, extra steps are taken to avoid the seed-dependent collisions + For example, extra steps are taken to avoid the seed-dependent collisions in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). This strength naturally comes at the cost of some speed, especially on short @@ -241,7 +205,7 @@ XXH3_len_4to8_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u6 /* Shift len to the left to ensure it is even, this avoids even multiplies. */ m128 := XXH128_hash_t{ - h = XXH_mul_64_to_128(keyed, u64(XXH_PRIME64_1) + (u64(length) << 2)), + h = u128(keyed) * (XXH_PRIME64_1 + u128(length) << 2), } m128.high += (m128.low << 1) m128.low ~= (m128.high >> 3) @@ -265,7 +229,7 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u input_lo := XXH64_read64(input[0:]) input_hi := XXH64_read64(input[length - 8:]) m128 := XXH128_hash_t{ - h = XXH_mul_64_to_128(input_lo ~ input_hi ~ bitflipl, XXH_PRIME64_1), + h = u128(input_lo ~ input_hi ~ bitflipl) * XXH_PRIME64_1, } /* * Put len in the middle of m128 to ensure that the length gets mixed to @@ -277,49 +241,14 @@ XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u * Add the high 32 bits of input_hi to the high 32 bits of m128, then * add the long product of the low 32 bits of input_hi and XXH_XXH_PRIME32_2 to * the high 64 bits of m128. - * - * The best approach to this operation is different on 32-bit and 64-bit. */ - when size_of(rawptr) == 4 { /* 32-bit */ - /* - * 32-bit optimized version, which is more readable. - * - * On 32-bit, it removes an ADC and delays a dependency between the two - * halves of m128.high64, but it generates an extra mask on 64-bit. - */ - m128.high += (input_hi & 0xFFFFFFFF00000000) + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2) - } else { - /* - * 64-bit optimized (albeit more confusing) version. - * - * Uses some properties of addition and multiplication to remove the mask: - * - * Let: - * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) - * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) - * c = XXH_XXH_PRIME32_2 - * - * a + (b * c) - * Inverse Property: x + y - x == y - * a + (b * (1 + c - 1)) - * Distributive Property: x * (y + z) == (x * y) + (x * z) - * a + (b * 1) + (b * (c - 1)) - * Identity Property: x * 1 == x - * a + b + (b * (c - 1)) - * - * Substitute a, b, and c: - * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1)) - * - * Since input_hi.hi + input_hi.lo == input_hi, we get this: - * input_hi + ((xxh_u64)input_hi.lo * (XXH_XXH_PRIME32_2 - 1)) - */ - m128.high += input_hi + XXH_mul_32_to_64(u32(input_hi), XXH_PRIME32_2 - 1) - } + m128.high += input_hi + u64(u32(input_hi)) * u64(XXH_PRIME32_2 - 1) + /* m128 ^= XXH_swap64(m128 >> 64); */ m128.low ~= byte_swap(m128.high) { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ h128 := XXH128_hash_t{ - h = XXH_mul_64_to_128(m128.low, XXH_PRIME64_2), + h = u128(m128.low) * XXH_PRIME64_2, } h128.high += m128.high * XXH_PRIME64_2 h128.low = XXH3_avalanche(h128.low) @@ -364,9 +293,6 @@ XXH128_mix32B :: #force_inline proc(acc: xxh_u128, input_1: []u8, input_2: []u8, } } - - - @(optimization_mode="speed") XXH3_len_17to128_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) { length := len(input) @@ -410,18 +336,18 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx i: int #no_bounds_check for i = 0; i < 4; i += 1 { acc.h = XXH128_mix32B(acc.h, - input[32 * i:], - input [32 * i + 16:], - secret[32 * i:], - seed) + input[32 * i:], + input [32 * i + 16:], + secret[32 * i:], + seed) } acc.low = XXH3_avalanche(acc.low) acc.high = XXH3_avalanche(acc.high) #no_bounds_check for i = 4; i < nbRounds; i += 1 { acc.h = XXH128_mix32B(acc.h, - input[32 * i:], input[32 * i + 16:], - secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):], + input[32 * i:], input[32 * i + 16:], + secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):], seed) } /* last bytes */ @@ -435,9 +361,9 @@ XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xx h128 := XXH128_hash_t{} h128.low = acc.low + acc.high h128.high = u64( - u128(acc.low * XXH_PRIME64_1) \ - + u128(acc.high * XXH_PRIME64_4) \ - + u128((u64(length) - seed) * XXH_PRIME64_2)) + u128(acc.low * XXH_PRIME64_1) \ + + u128(acc.high * XXH_PRIME64_4) \ + + u128((u64(length) - seed) * XXH_PRIME64_2)) h128.low = XXH3_avalanche(h128.low) h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high))) return h128.h @@ -481,18 +407,20 @@ XXH3_hashLong_128b_internal :: #force_inline proc( /* * It's important for performance that XXH3_hashLong is not inlined. */ +@(optimization_mode="speed") XXH3_hashLong_128b_default :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) { - k_secret := XXH3_kSecret - return XXH3_hashLong_128b_internal(input, k_secret[:], XXH3_accumulate_512, XXH3_scramble_accumulator) + return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator) } /* * It's important for performance that XXH3_hashLong is not inlined. */ +@(optimization_mode="speed") XXH3_hashLong_128b_withSecret :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) { return XXH3_hashLong_128b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator) } +@(optimization_mode="speed") XXH3_hashLong_128b_withSeed_internal :: #force_inline proc( input: []u8, seed: xxh_u64, secret: []u8, f_acc512: XXH3_accumulate_512_f, @@ -500,26 +428,27 @@ XXH3_hashLong_128b_withSeed_internal :: #force_inline proc( f_initSec: XXH3_init_custom_secret_f) -> (res: XXH3_128_hash) { if seed == 0 { - k := XXH3_kSecret - return XXH3_hashLong_128b_internal(input, k[:], f_acc512, f_scramble) + return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble) } { - secret := [XXH_SECRET_DEFAULT_SIZE]u8{} - f_initSec(secret[:], seed) - return XXH3_hashLong_128b_internal(input, secret[:], f_acc512, f_scramble) + _secret := [XXH_SECRET_DEFAULT_SIZE]u8{} + f_initSec(_secret[:], seed) + return XXH3_hashLong_128b_internal(input, _secret[:], f_acc512, f_scramble) } } /* * It's important for performance that XXH3_hashLong is not inlined. */ + @(optimization_mode="speed") XXH3_hashLong_128b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) { return XXH3_hashLong_128b_withSeed_internal(input, seed, secret, XXH3_accumulate_512, XXH3_scramble_accumulator , XXH3_init_custom_secret) } XXH3_hashLong128_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) +@(optimization_mode="speed") XXH3_128bits_internal :: #force_inline proc( input: []u8, seed: xxh_u64, secret: []u8, f_hl128: XXH3_hashLong128_f) -> (res: XXH3_128_hash) { @@ -545,13 +474,21 @@ XXH3_128bits_internal :: #force_inline proc( } /* === Public XXH128 API === */ - -XXH3_128bits :: proc(input: []u8) -> (hash: XXH3_128_hash) { - k := XXH3_kSecret - return XXH3_128bits_internal(input, XXH3_128_DEFAULT_SEED, k[:], XXH3_hashLong_128b_default) +@(optimization_mode="speed") +XXH3_128_default :: proc(input: []u8) -> (hash: XXH3_128_hash) { + return XXH3_128bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed) } +@(optimization_mode="speed") +XXH3_128_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: XXH3_128_hash) { + return XXH3_128bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed) +} +@(optimization_mode="speed") +XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) { + return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret) +} +XXH3_128 :: proc { XXH3_128_default, XXH3_128_with_seed, XXH3_128_with_secret } /* ========================================== @@ -613,7 +550,8 @@ XXH3_len_4to8_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64 assert(secret != nil) seed := seed - seed ~= u64(byte_swap(u32(seed) << 32)) + seed ~= (u64(byte_swap(u32(seed))) << 32) + #no_bounds_check { input1 := XXH32_read32(input) input2 := XXH32_read32(input[length - 4:]) @@ -756,9 +694,10 @@ XXH3_len_129to240_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: /* ======= Long Keys ======= */ -XXH_STRIPE_LEN :: 64 -XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */ -XXH_ACC_NB :: (XXH_STRIPE_LEN / size_of(xxh_u64)) +XXH_STRIPE_LEN :: 64 +XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */ +XXH_ACC_NB :: (XXH_STRIPE_LEN / size_of(xxh_u64)) +XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */ @(optimization_mode="speed") XXH_writeLE64 :: #force_inline proc(dst: []u8, v64: u64le) { @@ -808,9 +747,10 @@ XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, se #no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 { data_val := XXH64_read64(xinput[8 * i:]) - data_key := data_val ~ XXH64_read64(xsecret[8 * i:]) + sec := XXH64_read64(xsecret[8 * i:]) + data_key := data_val ~ sec xacc[i ~ 1] += data_val /* swap adjacent lanes */ - xacc[i ] += XXH_mul_32_to_64(u32(data_key & 0xFFFFFFFF), u32(data_key >> 32)) + xacc[i ] += u64(u128(u32(data_key)) * u128(u64(data_key >> 32))) } } @@ -835,12 +775,10 @@ XXH3_scramble_accumulator_scalar :: #force_inline proc(acc: []xxh_u64, secret: [ XXH3_init_custom_secret_scalar :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) { #assert((XXH_SECRET_DEFAULT_SIZE & 15) == 0) - kSecretPtr := XXH3_kSecret - nbRounds := XXH_SECRET_DEFAULT_SIZE / 16 #no_bounds_check for i := 0; i < nbRounds; i += 1 { - lo := XXH64_read64(kSecretPtr[16 * i: ]) + seed64 - hi := XXH64_read64(kSecretPtr[16 * i + 8:]) - seed64 + lo := XXH64_read64(XXH3_kSecret[16 * i: ]) + seed64 + hi := XXH64_read64(XXH3_kSecret[16 * i + 8:]) - seed64 XXH_writeLE64(custom_secret[16 * i: ], u64le(lo)) XXH_writeLE64(custom_secret[16 * i + 8:], u64le(hi)) } @@ -854,8 +792,8 @@ XXH_PREFETCH_DIST :: 320 * Assumption: nbStripes will not overflow the secret size */ @(optimization_mode="speed") -XXH3_accumulate :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, - f_acc512: XXH3_accumulate_512_f) { +XXH3_accumulate :: #force_inline proc( + acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) { for n := uint(0); n < nbStripes; n += 1 { when !XXH_DISABLE_PREFETCH { @@ -885,13 +823,11 @@ XXH3_hashLong_internal_loop :: #force_inline proc(acc: []xxh_u64, input: []u8, s /* last partial block */ #no_bounds_check { stripes := ((length - 1) - (block_len * blocks)) / XXH_STRIPE_LEN - XXH3_accumulate(acc, input[blocks * block_len:], secret, stripes, f_acc512) /* last stripe */ #no_bounds_check { p := input[length - XXH_STRIPE_LEN:] - XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */ f_acc512(acc, p, secret[secret_size - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START:]) } } @@ -911,4 +847,115 @@ XXH3_mergeAccs :: #force_inline proc(acc: []xxh_u64, secret: []u8, start: xxh_u6 result64 += XXH3_mix2Accs(acc[2 * i:], secret[16 * i:]) } return XXH3_avalanche(result64) -}
\ No newline at end of file +} + +@(optimization_mode="speed") +XXH3_hashLong_64b_internal :: #force_inline proc(input: []u8, secret: []u8, + f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) -> (hash: xxh_u64) { + + acc: [XXH_ACC_NB]xxh_u64 = XXH3_INIT_ACC + + XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble) + + /* converge into final hash */ + #assert(size_of(acc) == 64) + /* do not align on 8, so that the secret is different from the accumulator */ + XXH_SECRET_MERGEACCS_START :: 11 + assert(len(secret) >= size_of(acc) + XXH_SECRET_MERGEACCS_START) + return XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], xxh_u64(len(input)) * XXH_PRIME64_1) +} + +/* + It's important for performance that XXH3_hashLong is not inlined. +*/ +@(optimization_mode="speed") +XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) { + return XXH3_hashLong_64b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator) +} + +/* + It's important for performance that XXH3_hashLong is not inlined. + Since the function is not inlined, the compiler may not be able to understand that, + in some scenarios, its `secret` argument is actually a compile time constant. + This variant enforces that the compiler can detect that, + and uses this opportunity to streamline the generated code for better performance. +*/ +@(optimization_mode="speed") +XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) { + return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator) +} + +/* + XXH3_hashLong_64b_withSeed(): + Generate a custom key based on alteration of default XXH3_kSecret with the seed, + and then use this key for long mode hashing. + + This operation is decently fast but nonetheless costs a little bit of time. + Try to avoid it whenever possible (typically when seed==0). + + It's important for performance that XXH3_hashLong is not inlined. Not sure + why (uop cache maybe?), but the difference is large and easily measurable. +*/ +@(optimization_mode="speed") +XXH3_hashLong_64b_withSeed_internal :: #force_no_inline proc(input: []u8, + seed: xxh_u64, + f_acc512: XXH3_accumulate_512_f, + f_scramble: XXH3_scramble_accumulator_f, + f_init_sec: XXH3_init_custom_secret_f) -> (hash: xxh_u64) { + if seed == 0 { + return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble) + } + { + secret: [XXH_SECRET_DEFAULT_SIZE]u8 + f_init_sec(secret[:], seed) + return XXH3_hashLong_64b_internal(input, secret[:], f_acc512, f_scramble) + } +} + +/* + It's important for performance that XXH3_hashLong is not inlined. +*/ +@(optimization_mode="speed") +XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (hash: xxh_u64) { + return XXH3_hashLong_64b_withSeed_internal(input, seed, XXH3_accumulate_512, XXH3_scramble_accumulator, XXH3_init_custom_secret) +} + + +XXH3_hashLong64_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: xxh_u64) + +@(optimization_mode="speed") +XXH3_64bits_internal :: proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLong: XXH3_hashLong64_f) -> (hash: xxh_u64) { + assert(len(secret) >= XXH3_SECRET_SIZE_MIN) + /* + If an action is to be taken if len(secret) condition is not respected, it should be done here. + For now, it's a contract pre-condition. + Adding a check and a branch here would cost performance at every hash. + Also, note that function signature doesn't offer room to return an error. + */ + length := len(input) + switch { + case length <= 16: return XXH3_len_0to16_64b(input, secret, seed) + case length <= 128: return XXH3_len_17to128_64b(input, secret, seed) + case length <= XXH3_MIDSIZE_MAX: return XXH3_len_129to240_64b(input, secret, seed) + case: return f_hashLong(input, seed, secret) + } + unreachable() +} + +/* === Public entry point === */ +@(optimization_mode="speed") +XXH3_64_default :: proc(input: []u8) -> (hash: xxh_u64) { + return XXH3_64bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_64b_default) +} + +@(optimization_mode="speed") +XXH3_64_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: xxh_u64) { + return XXH3_64bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_64b_withSeed) +} + +@(optimization_mode="speed") +XXH3_64_with_secret :: proc(input, secret: []u8) -> (hash: xxh_u64) { + return XXH3_64bits_internal(input, 0, secret, XXH3_hashLong_64b_withSecret) +} + +XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }
\ No newline at end of file diff --git a/core/hash/xxhash/xxhash_32.odin b/core/hash/xxhash/xxhash_32.odin index f41161133..e63d998dd 100644 --- a/core/hash/xxhash/xxhash_32.odin +++ b/core/hash/xxhash/xxhash_32.odin @@ -197,12 +197,12 @@ XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) { */ XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) { state := new(XXH32_state, allocator) - return state, nil if state != nil else .Error + return state, .None if state != nil else .Error } XXH32_destroy_state :: proc(state: ^XXH32_state, allocator := context.allocator) -> (err: Error) { free(state, allocator) - return nil + return .None } XXH32_copy_state :: proc(dest, src: ^XXH32_state) { @@ -221,7 +221,7 @@ XXH32_reset_state :: proc(state_ptr: ^XXH32_state, seed := XXH32_DEFAULT_SEED) - Do not write into reserved, planned to be removed in a future version. */ mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved)) - return nil + return .None } XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) { @@ -236,7 +236,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) { ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize) mem_copy(rawptr(ptr), raw_data(input), int(length)) state.memsize += XXH32_hash(length) - return nil + return .None } if state.memsize > 0 {/* Some data left from previous update */ @@ -276,7 +276,7 @@ XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) { mem_copy(raw_data(state.mem32[:]), raw_data(buf[:]), int(length)) state.memsize = u32(length) } - return nil + return .None } XXH32_digest :: proc(state: ^XXH32_state) -> (res: XXH32_hash) { diff --git a/core/hash/xxhash/xxhash_64.odin b/core/hash/xxhash/xxhash_64.odin index d535a134c..e95842168 100644 --- a/core/hash/xxhash/xxhash_64.odin +++ b/core/hash/xxhash/xxhash_64.odin @@ -163,12 +163,12 @@ XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) { */ XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) { state := new(XXH64_state, allocator) - return state, nil if state != nil else .Error + return state, .None if state != nil else .Error } XXH64_destroy_state :: proc(state: ^XXH64_state, allocator := context.allocator) -> (err: Error) { free(state, allocator) - return nil + return .None } XXH64_copy_state :: proc(dest, src: ^XXH64_state) { @@ -187,7 +187,7 @@ XXH64_reset_state :: proc(state_ptr: ^XXH64_state, seed := XXH64_DEFAULT_SEED) - Fo not write into reserved64, might be removed in a future version. */ mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved64)) - return nil + return .None } @(optimization_mode="speed") @@ -201,7 +201,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) { ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize) mem_copy(rawptr(ptr), raw_data(input), int(length)) state.memsize += u32(length) - return nil + return .None } if state.memsize > 0 { /* tmp buffer is full */ @@ -241,7 +241,7 @@ XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) { mem_copy(raw_data(state.mem64[:]), raw_data(buf[:]), int(length)) state.memsize = u32(length) } - return nil + return .None } @(optimization_mode="speed") |