diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2025-05-31 20:24:21 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2025-05-31 20:24:21 +0200 |
| commit | 890e923051a90362b6c4a5e7aaaeafc00d533424 (patch) | |
| tree | 90b32ba2e69372594edd111946933ee8d342d994 /tests | |
| parent | d52aa3f2c22362f417e440e988e186f683449261 (diff) | |
Vectorize `strings.prefix_length`.
Also add `strings.common_prefix`.
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/benchmark/all.odin | 1 | ||||
| -rw-r--r-- | tests/benchmark/strings/benchmark_strings.odin | 132 | ||||
| -rw-r--r-- | tests/core/strings/test_core_strings.odin | 51 |
3 files changed, 183 insertions, 1 deletions
diff --git a/tests/benchmark/all.odin b/tests/benchmark/all.odin index a48872cc6..30640ac87 100644 --- a/tests/benchmark/all.odin +++ b/tests/benchmark/all.odin @@ -4,3 +4,4 @@ package benchmarks @(require) import "crypto" @(require) import "hash" @(require) import "text/regex" +@(require) import "strings"
\ No newline at end of file diff --git a/tests/benchmark/strings/benchmark_strings.odin b/tests/benchmark/strings/benchmark_strings.odin new file mode 100644 index 000000000..9f5a76f0f --- /dev/null +++ b/tests/benchmark/strings/benchmark_strings.odin @@ -0,0 +1,132 @@ +package benchmark_strings + +import "base:intrinsics" +import "base:runtime" +import "core:fmt" +import "core:log" +import "core:testing" +import "core:strings" +import "core:text/table" +import "core:time" +import "core:unicode/utf8" + +RUNS_PER_SIZE :: 2500 + +sizes := [?]int { + 7, 8, 9, + 15, 16, 17, + 31, 32, 33, + 63, 64, 65, + 95, 96, 97, + 128, + 256, + 512, + 1024, + 4096, +} + +// These are the normal, unoptimized algorithms. + +plain_prefix_length :: proc "contextless" (a, b: string) -> (n: int) { + _len := min(len(a), len(b)) + + // Scan for matches including partial codepoints. + #no_bounds_check for n < _len && a[n] == b[n] { + n += 1 + } + + // Now scan to ignore partial codepoints. + if n > 0 { + s := a[:n] + n = 0 + for { + r0, w := utf8.decode_rune(s[n:]) + if r0 != utf8.RUNE_ERROR { + n += w + } else { + break + } + } + } + return +} + +run_trial_size_prefix :: proc(p: proc "contextless" (string, string) -> $R, suffix: string, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) { + left := make([]u8, size) + right := make([]u8, size) + defer { + delete(left) + delete(right) + } + + if len(suffix) > 0 { + copy(left [idx:], suffix) + copy(right[idx:], suffix) + + } else { + right[idx] = 'A' + } + + accumulator: int + + watch: time.Stopwatch + + time.stopwatch_start(&watch) + for _ in 0..<runs { + result := p(string(left[:size]), string(right[:size])) + accumulator += result + } + time.stopwatch_stop(&watch) + timing = time.stopwatch_duration(watch) + + log.debug(accumulator) + return +} + +run_trial_size :: proc { + run_trial_size_prefix, +} + +bench_table_size :: proc(algo_name: string, plain, simd: $P, suffix := "") { + string_buffer := strings.builder_make() + defer strings.builder_destroy(&string_buffer) + + tbl: table.Table + table.init(&tbl) + defer table.destroy(&tbl) + + table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Iterations", "Scalar", "SIMD", "SIMD Relative (%)", "SIMD Relative (x)") + + for size in sizes { + // Place the non-zero byte somewhere in the middle. + needle_index := size / 2 + + plain_timing := run_trial_size(plain, suffix, size, needle_index, RUNS_PER_SIZE) + simd_timing := run_trial_size(simd, suffix, size, needle_index, RUNS_PER_SIZE) + + _plain := fmt.tprintf("%8M", plain_timing) + _simd := fmt.tprintf("%8M", simd_timing) + _relp := fmt.tprintf("%.3f %%", f64(simd_timing) / f64(plain_timing) * 100.0) + _relx := fmt.tprintf("%.3f x", 1 / (f64(simd_timing) / f64(plain_timing))) + + table.aligned_row_of_values( + &tbl, + .Right, + algo_name, + size, RUNS_PER_SIZE, _plain, _simd, _relp, _relx) + } + + builder_writer := strings.to_writer(&string_buffer) + + fmt.sbprintln(&string_buffer) + table.write_plain_table(builder_writer, &tbl) + + my_table_string := strings.to_string(string_buffer) + log.info(my_table_string) +} + +@test +benchmark_memory_procs :: proc(t: ^testing.T) { + bench_table_size("prefix_length ascii", plain_prefix_length, strings.prefix_length) + bench_table_size("prefix_length unicode", plain_prefix_length, strings.prefix_length, "🦉") +}
\ No newline at end of file diff --git a/tests/core/strings/test_core_strings.odin b/tests/core/strings/test_core_strings.odin index 7b5f7fbb4..140689468 100644 --- a/tests/core/strings/test_core_strings.odin +++ b/tests/core/strings/test_core_strings.odin @@ -1,9 +1,10 @@ package test_core_strings +import "base:runtime" import "core:mem" import "core:strings" import "core:testing" -import "base:runtime" +import "core:unicode/utf8" @test test_index_any_small_string_not_found :: proc(t: ^testing.T) { @@ -218,3 +219,51 @@ test_builder_to_cstring :: proc(t: ^testing.T) { testing.expect(t, err == .Out_Of_Memory) } } + +@test +test_prefix_length :: proc(t: ^testing.T) { + prefix_length :: proc "contextless" (a, b: string) -> (n: int) { + _len := min(len(a), len(b)) + + // Scan for matches including partial codepoints. + #no_bounds_check for n < _len && a[n] == b[n] { + n += 1 + } + + // Now scan to ignore partial codepoints. + if n > 0 { + s := a[:n] + n = 0 + for { + r0, w := utf8.decode_rune(s[n:]) + if r0 != utf8.RUNE_ERROR { + n += w + } else { + break + } + } + } + return + } + + cases := [][2]string{ + {"Hellope, there!", "Hellope, world!"}, + {"Hellope, there!", "Foozle"}, + {"Hellope, there!", "Hell"}, + {"Hellope! 🦉", "Hellope! 🦉"}, + } + + for v in cases { + p_scalar := prefix_length(v[0], v[1]) + p_simd := strings.prefix_length(v[0], v[1]) + testing.expect_value(t, p_simd, p_scalar) + + s := v[0] + for len(s) > 0 { + p_scalar = prefix_length(v[0], s) + p_simd = strings.prefix_length(v[0], s) + testing.expect_value(t, p_simd, p_scalar) + s = s[:len(s) - 1] + } + } +}
\ No newline at end of file |