diff options
| author | Feoramund <161657516+Feoramund@users.noreply.github.com> | 2024-08-09 17:39:19 -0400 |
|---|---|---|
| committer | Feoramund <161657516+Feoramund@users.noreply.github.com> | 2024-08-09 18:54:04 -0400 |
| commit | 12dd0cb72a586a99129280c78697089caab0500a (patch) | |
| tree | 80bbcb47b01bcefff2595c61147dd15318054b00 /core/strings/strings.odin | |
| parent | 793811b219e77b21a1c765323957e4b74ce13e64 (diff) | |
Simplify and make `simd_util` cross-platform
This new algorithm uses a Scalar->Vector->Scalar iteration loop which
requires no masking off of any incomplete data chunks.
Also, the width was reduced to 32 bytes instead of 64, as I found this
to be about as fast as the previous 64-byte x86 version.
Diffstat (limited to 'core/strings/strings.odin')
| -rw-r--r-- | core/strings/strings.odin | 18 |
1 files changed, 4 insertions, 14 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 9d3e88165..b8e43f90d 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -1438,14 +1438,8 @@ index_byte :: proc(s: string, c: byte) -> (res: int) { // NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a // significant speedup when compiling in either Size or Speed mode. // The SIMD version is usually 2-3x slower without optimizations on. - when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { - // SIMD's benefits are noticeable only past a certain threshold of data. - // For small data, use the plain old algorithm. - if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { - return simd_util.index_byte(transmute([]u8)s, c) - } else { - return _index_byte(s, c) - } + when ODIN_OPTIMIZATION_MODE > .Minimal { + return #force_inline simd_util.index_byte(transmute([]u8)s, c) } else { return _index_byte(s, c) } @@ -1492,12 +1486,8 @@ last_index_byte :: proc(s: string, c: byte) -> (res: int) { return -1 } - when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { - if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { - return simd_util.last_index_byte(transmute([]u8)s, c) - } else { - return _last_index_byte(s, c) - } + when ODIN_OPTIMIZATION_MODE > .Minimal { + return #force_inline simd_util.last_index_byte(transmute([]u8)s, c) } else { return _last_index_byte(s, c) } |