diff options
| author | Feoramund <161657516+Feoramund@users.noreply.github.com> | 2024-08-04 15:58:56 -0400 |
|---|---|---|
| committer | Feoramund <161657516+Feoramund@users.noreply.github.com> | 2024-08-06 15:19:05 -0400 |
| commit | f66fcd9acb390b199452a125ed09899dffefde5d (patch) | |
| tree | d9078dfb5942567a627bb9cffa10e08e4591109e /core/bytes/bytes.odin | |
| parent | 8deeb40e5de1d33b571f0b1faf7b8dea678cd91b (diff) | |
Use vectorized `index_*` procs in `core`
Diffstat (limited to 'core/bytes/bytes.odin')
| -rw-r--r-- | core/bytes/bytes.odin | 47 |
1 files changed, 39 insertions, 8 deletions
diff --git a/core/bytes/bytes.odin b/core/bytes/bytes.odin index 7cbf092ac..dcd4931e2 100644 --- a/core/bytes/bytes.odin +++ b/core/bytes/bytes.odin @@ -1,6 +1,8 @@ package bytes +import "base:intrinsics" import "core:mem" +@require import simd_util "core:simd/util" import "core:unicode" import "core:unicode/utf8" @@ -295,22 +297,51 @@ split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) { index_byte :: proc(s: []byte, c: byte) -> int { - for i := 0; i < len(s); i += 1 { - if s[i] == c { - return i + _index_byte :: #force_inline proc(s: []byte, c: byte) -> int { + for i := 0; i < len(s); i += 1 { + if s[i] == c { + return i + } + } + return -1 + } + + // NOTE(Feoramund): On my Alder Lake CPU, I have only witnessed a + // significant speedup when compiling in either Size or Speed mode. + // The SIMD version is usually 2-3x slower without optimizations on. + when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { + // SIMD's benefits are noticeable only past a certain threshold of data. + // For small data, use the plain old algorithm. + if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { + return simd_util.index_byte(s, c) + } else { + return _index_byte(s, c) } + } else { + return _index_byte(s, c) } - return -1 } // Returns -1 if c is not present last_index_byte :: proc(s: []byte, c: byte) -> int { - for i := len(s)-1; i >= 0; i -= 1 { - if s[i] == c { - return i + _last_index_byte :: #force_inline proc(s: []byte, c: byte) -> int { + for i := len(s)-1; i >= 0; i -= 1 { + if s[i] == c { + return i + } } + return -1 + } + + when ODIN_OPTIMIZATION_MODE > .Minimal && intrinsics.has_target_feature("sse2") { + if len(s) >= simd_util.RECOMMENDED_SCAN_SIZE { + return simd_util.last_index_byte(s, c) + } else { + return _last_index_byte(s, c) + } + } else { + return _last_index_byte(s, c) } - return -1 } |