diff options
| author | gingerBill <bill@gingerbill.org> | 2021-08-27 12:07:57 +0100 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2021-08-27 12:07:57 +0100 |
| commit | bf56e3ea8dab1da71d54de320007147ae969b78f (patch) | |
| tree | 84c5e58dc04897b5dc8a662a396782737926615f | |
| parent | 284acc37f935ec89d6ebf7d017741c3001181502 (diff) | |
Improve `strings.index_any` and `strings.last_index_any`
| -rw-r--r-- | core/strings/ascii_set.odin | 22 | ||||
| -rw-r--r-- | core/strings/strings.odin | 71 |
2 files changed, 82 insertions, 11 deletions
diff --git a/core/strings/ascii_set.odin b/core/strings/ascii_set.odin new file mode 100644 index 000000000..06aea982f --- /dev/null +++ b/core/strings/ascii_set.odin @@ -0,0 +1,22 @@ +//+private +package strings + +import "core:unicode/utf8" + +Ascii_Set :: distinct [8]u32; + +ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check { + for i in 0..<len(chars) { + c := chars[i]; + if c >= utf8.RUNE_SELF { + return; + } + as[c>>5] |= 1 << uint(c&31); + } + ok = true; + return; +} + +ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check { + return as[c>>5] & (1<<(c&31)) != 0; +}
\ No newline at end of file diff --git a/core/strings/strings.odin b/core/strings/strings.odin index fac0879a1..70ceca26a 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -479,17 +479,34 @@ last_index :: proc(s, substr: string) -> int { return -1; } + index_any :: proc(s, chars: string) -> int { if chars == "" { return -1; } - - // TODO(bill): Optimize - for r, i in s { - for c in chars { - if r == c { - return i; + + if len(chars) == 1 { + r := rune(chars[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + return index_rune(s, r); + } + + if len(s) > 8 { + if as, ok := ascii_set_make(chars); ok { + for i in 0..<len(s) { + if ascii_set_contains(as, s[i]) { + return i; + } } + return -1; + } + } + + for c, i in chars { + if index_rune(chars, c) >= 0 { + return i; } } return -1; @@ -499,14 +516,46 @@ last_index_any :: proc(s, chars: string) -> int { if chars == "" { return -1; } + + if len(s) == 1 { + r := rune(s[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + return index_rune(chars, r); + } + + if len(s) > 8 { + if as, ok := ascii_set_make(chars); ok { + for i := len(s)-1; i >= 0; i -= 1 { + if ascii_set_contains(as, s[i]) { + return i; + } + } + return -1; + } + } + + if len(chars) == 1 { + r := rune(chars[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + for i := len(s); i > 0; /**/ { + c, w := utf8.decode_last_rune_in_string(s[:i]); + i -= w; + if c == r { + return i; + } + } + return -1; + } - for i := len(s); i > 0; { + for i := len(s); i > 0; /**/ { r, w := utf8.decode_last_rune_in_string(s[:i]); i -= w; - for c in chars { - if r == c { - return i; - } + if index_rune(chars, r) >= 0 { + return i; } } return -1; |