From 12dd0cb72a586a99129280c78697089caab0500a Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:39:19 -0400 Subject: Simplify and make `simd_util` cross-platform This new algorithm uses a Scalar->Vector->Scalar iteration loop which requires no masking off of any incomplete data chunks. Also, the width was reduced to 32 bytes instead of 64, as I found this to be about as fast as the previous 64-byte x86 version. --- core/strings/strings.odin | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'core/strings') diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 9d3e88165..b8e43f90d 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -1438,14 +1438,8 @@ index_byte :: proc(s: string, c: byte) -> (res: int) { // NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a // significant speedup when compiling in either Size or Speed mode. // The SIMD version is usually 2-3x slower without optimizations on. - when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { - // SIMD's benefits are noticeable only past a certain threshold of data. - // For small data, use the plain old algorithm. - if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { - return simd_util.index_byte(transmute([]u8)s, c) - } else { - return _index_byte(s, c) - } + when ODIN_OPTIMIZATION_MODE > .Minimal { + return #force_inline simd_util.index_byte(transmute([]u8)s, c) } else { return _index_byte(s, c) } @@ -1492,12 +1486,8 @@ last_index_byte :: proc(s: string, c: byte) -> (res: int) { return -1 } - when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { - if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { - return simd_util.last_index_byte(transmute([]u8)s, c) - } else { - return _last_index_byte(s, c) - } + when ODIN_OPTIMIZATION_MODE > .Minimal { + return #force_inline simd_util.last_index_byte(transmute([]u8)s, c) } else { return _last_index_byte(s, c) } -- cgit v1.2.3