diff options
Diffstat (limited to 'base/runtime/internal.odin')
| -rw-r--r-- | base/runtime/internal.odin | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index a35dbff8a..907b187f1 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -405,6 +405,74 @@ memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_ return 0 } +memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check { + switch { + case x == y: return n + case x == nil: return 0 + case y == nil: return 0 + } + a, b := cast([^]byte)x, cast([^]byte)y + + n := uint(n) + i := uint(0) + m := uint(0) + + when HAS_HARDWARE_SIMD { + when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") { + m = n / 32 * 32 + for /**/; i < m; i += 32 { + load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i]) + load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i]) + comparison := intrinsics.simd_lanes_ne(load_a, load_b) + if intrinsics.simd_reduce_or(comparison) != 0 { + sentinel: #simd[32]u8 = u8(0xFF) + indices := intrinsics.simd_indices(#simd[32]u8) + index_select := intrinsics.simd_select(comparison, indices, sentinel) + index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select) + return int(i + index_reduce) + } + } + } + } + + m = (n-i) / 16 * 16 + for /**/; i < m; i += 16 { + load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i]) + load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i]) + comparison := intrinsics.simd_lanes_ne(load_a, load_b) + if intrinsics.simd_reduce_or(comparison) != 0 { + sentinel: #simd[16]u8 = u8(0xFF) + indices := intrinsics.simd_indices(#simd[16]u8) + index_select := intrinsics.simd_select(comparison, indices, sentinel) + index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select) + return int(i + index_reduce) + } + } + + // 64-bit SIMD is faster than using a `uintptr` to detect a difference then + // re-iterating with the byte-by-byte loop, at least on AMD64. + m = (n-i) / 8 * 8 + for /**/; i < m; i += 8 { + load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i]) + load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i]) + comparison := intrinsics.simd_lanes_ne(load_a, load_b) + if intrinsics.simd_reduce_or(comparison) != 0 { + sentinel: #simd[8]u8 = u8(0xFF) + indices := intrinsics.simd_indices(#simd[8]u8) + index_select := intrinsics.simd_select(comparison, indices, sentinel) + index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select) + return int(i + index_reduce) + } + } + + for /**/; i < n; i += 1 { + if a[i] ~ b[i] != 0 { + return int(i) + } + } + return int(n) +} + string_eq :: proc "contextless" (lhs, rhs: string) -> bool { x := transmute(Raw_String)lhs y := transmute(Raw_String)rhs |