aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2022-06-16 16:12:15 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2022-06-16 16:12:15 +0200
commit6f1222e9bfa76fdb45668465d62c2a454c07eca8 (patch)
treed118efe4966ded2fdf5208474c74d5c00f5f4b9c
parent84a424f21e60f56415c7f1d3336ced9eac02ad81 (diff)
Update `strings.prefix_length` to handle partial UTF-8 runes.
-rw-r--r--core/strings/strings.odin21
1 files changed, 15 insertions, 6 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index 678cc94cd..6bdafbba4 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -225,14 +225,23 @@ equal_fold :: proc(u, v: string) -> bool {
*/
prefix_length :: proc(a, b: string) -> (n: int) {
_len := min(len(a), len(b))
- idx := 0
- #no_bounds_check for idx < _len && a[idx] == b[idx] {
- idx += 1
+ // Scan for matches including partial codepoints.
+ #no_bounds_check for n < _len && a[n] == b[n] {
+ n += 1
+ }
- if a[idx] & 128 != 128 {
- // new codepoint or end of multi-byte codepoint, update match length
- n = idx
+ // Now scan to ignore partial codepoints.
+ if n > 0 {
+ s := a[:n]
+ n = 0
+ for {
+ r0, w := utf8.decode_rune(s[n:])
+ if r0 != utf8.RUNE_ERROR {
+ n += w
+ } else {
+ break
+ }
}
}
return