aboutsummaryrefslogtreecommitdiff
path: root/core/strings/strings.odin
diff options
context:
space:
mode:
authorFeoramund <161657516+Feoramund@users.noreply.github.com>2024-08-09 17:39:19 -0400
committerFeoramund <161657516+Feoramund@users.noreply.github.com>2024-08-09 18:54:04 -0400
commit12dd0cb72a586a99129280c78697089caab0500a (patch)
tree80bbcb47b01bcefff2595c61147dd15318054b00 /core/strings/strings.odin
parent793811b219e77b21a1c765323957e4b74ce13e64 (diff)
Simplify and make `simd_util` cross-platform
This new algorithm uses a Scalar->Vector->Scalar iteration loop which requires no masking off of any incomplete data chunks. Also, the width was reduced to 32 bytes instead of 64, as I found this to be about as fast as the previous 64-byte x86 version.
Diffstat (limited to 'core/strings/strings.odin')
-rw-r--r--core/strings/strings.odin18
1 files changed, 4 insertions, 14 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index 9d3e88165..b8e43f90d 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -1438,14 +1438,8 @@ index_byte :: proc(s: string, c: byte) -> (res: int) {
// NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a
// significant speedup when compiling in either Size or Speed mode.
// The SIMD version is usually 2-3x slower without optimizations on.
- when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
- // SIMD's benefits are noticeable only past a certain threshold of data.
- // For small data, use the plain old algorithm.
- if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
- return simd_util.index_byte(transmute([]u8)s, c)
- } else {
- return _index_byte(s, c)
- }
+ when ODIN_OPTIMIZATION_MODE > .Minimal {
+ return #force_inline simd_util.index_byte(transmute([]u8)s, c)
} else {
return _index_byte(s, c)
}
@@ -1492,12 +1486,8 @@ last_index_byte :: proc(s: string, c: byte) -> (res: int) {
return -1
}
- when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
- if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
- return simd_util.last_index_byte(transmute([]u8)s, c)
- } else {
- return _last_index_byte(s, c)
- }
+ when ODIN_OPTIMIZATION_MODE > .Minimal {
+ return #force_inline simd_util.last_index_byte(transmute([]u8)s, c)
} else {
return _last_index_byte(s, c)
}