diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-06-16 16:12:15 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-06-16 16:12:15 +0200 |
| commit | 6f1222e9bfa76fdb45668465d62c2a454c07eca8 (patch) | |
| tree | d118efe4966ded2fdf5208474c74d5c00f5f4b9c | |
| parent | 84a424f21e60f56415c7f1d3336ced9eac02ad81 (diff) | |
Update `strings.prefix_length` to handle partial UTF-8 runes.
| -rw-r--r-- | core/strings/strings.odin | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 678cc94cd..6bdafbba4 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -225,14 +225,23 @@ equal_fold :: proc(u, v: string) -> bool { */ prefix_length :: proc(a, b: string) -> (n: int) { _len := min(len(a), len(b)) - idx := 0 - #no_bounds_check for idx < _len && a[idx] == b[idx] { - idx += 1 + // Scan for matches including partial codepoints. + #no_bounds_check for n < _len && a[n] == b[n] { + n += 1 + } - if a[idx] & 128 != 128 { - // new codepoint or end of multi-byte codepoint, update match length - n = idx + // Now scan to ignore partial codepoints. + if n > 0 { + s := a[:n] + n = 0 + for { + r0, w := utf8.decode_rune(s[n:]) + if r0 != utf8.RUNE_ERROR { + n += w + } else { + break + } } } return |