aboutsummaryrefslogtreecommitdiff
path: root/base/runtime/internal.odin
diff options
context:
space:
mode:
Diffstat (limited to 'base/runtime/internal.odin')
-rw-r--r--base/runtime/internal.odin68
1 files changed, 68 insertions, 0 deletions
diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin
index a35dbff8a..907b187f1 100644
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -405,6 +405,74 @@ memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_
return 0
}
+memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check {
+ switch {
+ case x == y: return n
+ case x == nil: return 0
+ case y == nil: return 0
+ }
+ a, b := cast([^]byte)x, cast([^]byte)y
+
+ n := uint(n)
+ i := uint(0)
+ m := uint(0)
+
+ when HAS_HARDWARE_SIMD {
+ when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
+ m = n / 32 * 32
+ for /**/; i < m; i += 32 {
+ load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
+ load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
+ comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+ if intrinsics.simd_reduce_or(comparison) != 0 {
+ sentinel: #simd[32]u8 = u8(0xFF)
+ indices := intrinsics.simd_indices(#simd[32]u8)
+ index_select := intrinsics.simd_select(comparison, indices, sentinel)
+ index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+ return int(i + index_reduce)
+ }
+ }
+ }
+ }
+
+ m = (n-i) / 16 * 16
+ for /**/; i < m; i += 16 {
+ load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
+ load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
+ comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+ if intrinsics.simd_reduce_or(comparison) != 0 {
+ sentinel: #simd[16]u8 = u8(0xFF)
+ indices := intrinsics.simd_indices(#simd[16]u8)
+ index_select := intrinsics.simd_select(comparison, indices, sentinel)
+ index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+ return int(i + index_reduce)
+ }
+ }
+
+ // 64-bit SIMD is faster than using a `uintptr` to detect a difference then
+ // re-iterating with the byte-by-byte loop, at least on AMD64.
+ m = (n-i) / 8 * 8
+ for /**/; i < m; i += 8 {
+ load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
+ load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
+ comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+ if intrinsics.simd_reduce_or(comparison) != 0 {
+ sentinel: #simd[8]u8 = u8(0xFF)
+ indices := intrinsics.simd_indices(#simd[8]u8)
+ index_select := intrinsics.simd_select(comparison, indices, sentinel)
+ index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+ return int(i + index_reduce)
+ }
+ }
+
+ for /**/; i < n; i += 1 {
+ if a[i] ~ b[i] != 0 {
+ return int(i)
+ }
+ }
+ return int(n)
+}
+
string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
x := transmute(Raw_String)lhs
y := transmute(Raw_String)rhs