aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorFeoramund <161657516+Feoramund@users.noreply.github.com>2024-08-04 15:58:56 -0400
committerFeoramund <161657516+Feoramund@users.noreply.github.com>2024-08-06 15:19:05 -0400
commitf66fcd9acb390b199452a125ed09899dffefde5d (patch)
treed9078dfb5942567a627bb9cffa10e08e4591109e /core
parent8deeb40e5de1d33b571f0b1faf7b8dea678cd91b (diff)
Use vectorized `index_*` procs in `core`
Diffstat (limited to 'core')
-rw-r--r--core/bytes/bytes.odin47
-rw-r--r--core/strings/strings.odin47
2 files changed, 78 insertions, 16 deletions
diff --git a/core/bytes/bytes.odin b/core/bytes/bytes.odin
index 7cbf092ac..dcd4931e2 100644
--- a/core/bytes/bytes.odin
+++ b/core/bytes/bytes.odin
@@ -1,6 +1,8 @@
package bytes
+import "base:intrinsics"
import "core:mem"
+@require import simd_util "core:simd/util"
import "core:unicode"
import "core:unicode/utf8"
@@ -295,22 +297,51 @@ split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
index_byte :: proc(s: []byte, c: byte) -> int {
- for i := 0; i < len(s); i += 1 {
- if s[i] == c {
- return i
+ _index_byte :: #force_inline proc(s: []byte, c: byte) -> int {
+ for i := 0; i < len(s); i += 1 {
+ if s[i] == c {
+ return i
+ }
+ }
+ return -1
+ }
+
+ // NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a
+ // significant speedup when compiling in either Size or Speed mode.
+ // The SIMD version is usually 2-3x slower without optimizations on.
+ when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
+ // SIMD's benefits are noticeable only past a certain threshold of data.
+ // For small data, use the plain old algorithm.
+ if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
+ return simd_util.index_byte(s, c)
+ } else {
+ return _index_byte(s, c)
}
+ } else {
+ return _index_byte(s, c)
}
- return -1
}
// Returns -1 if c is not present
last_index_byte :: proc(s: []byte, c: byte) -> int {
- for i := len(s)-1; i >= 0; i -= 1 {
- if s[i] == c {
- return i
+ _last_index_byte :: #force_inline proc(s: []byte, c: byte) -> int {
+ for i := len(s)-1; i >= 0; i -= 1 {
+ if s[i] == c {
+ return i
+ }
}
+ return -1
+ }
+
+ when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
+ if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
+ return simd_util.last_index_byte(s, c)
+ } else {
+ return _last_index_byte(s, c)
+ }
+ } else {
+ return _last_index_byte(s, c)
}
- return -1
}
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index e9b50bab0..9d3e88165 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -1,7 +1,9 @@
// Procedures to manipulate UTF-8 encoded strings
package strings
+import "base:intrinsics"
import "core:io"
+@require import simd_util "core:simd/util"
import "core:mem"
import "core:unicode"
import "core:unicode/utf8"
@@ -1424,12 +1426,29 @@ Output:
*/
index_byte :: proc(s: string, c: byte) -> (res: int) {
- for i := 0; i < len(s); i += 1 {
- if s[i] == c {
- return i
+ _index_byte :: #force_inline proc(s: string, c: byte) -> int {
+ for i := 0; i < len(s); i += 1 {
+ if s[i] == c {
+ return i
+ }
+ }
+ return -1
+ }
+
+ // NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a
+ // significant speedup when compiling in either Size or Speed mode.
+ // The SIMD version is usually 2-3x slower without optimizations on.
+ when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
+ // SIMD's benefits are noticeable only past a certain threshold of data.
+ // For small data, use the plain old algorithm.
+ if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
+ return simd_util.index_byte(transmute([]u8)s, c)
+ } else {
+ return _index_byte(s, c)
}
+ } else {
+ return _index_byte(s, c)
}
- return -1
}
/*
Returns the byte offset of the last byte `c` in the string `s`, -1 when not found.
@@ -1464,12 +1483,24 @@ Output:
*/
last_index_byte :: proc(s: string, c: byte) -> (res: int) {
- for i := len(s)-1; i >= 0; i -= 1 {
- if s[i] == c {
- return i
+ _last_index_byte :: #force_inline proc(s: string, c: byte) -> int {
+ for i := len(s)-1; i >= 0; i -= 1 {
+ if s[i] == c {
+ return i
+ }
}
+ return -1
+ }
+
+ when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") {
+ if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE {
+ return simd_util.last_index_byte(transmute([]u8)s, c)
+ } else {
+ return _last_index_byte(s, c)
+ }
+ } else {
+ return _last_index_byte(s, c)
}
- return -1
}
/*
Returns the byte offset of the first rune `r` in the string `s` it finds, -1 when not found.