From bf56e3ea8dab1da71d54de320007147ae969b78f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 27 Aug 2021 12:07:57 +0100 Subject: Improve `strings.index_any` and `strings.last_index_any` --- core/strings/ascii_set.odin | 22 ++++++++++++++ core/strings/strings.odin | 71 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 core/strings/ascii_set.odin diff --git a/core/strings/ascii_set.odin b/core/strings/ascii_set.odin new file mode 100644 index 000000000..06aea982f --- /dev/null +++ b/core/strings/ascii_set.odin @@ -0,0 +1,22 @@ +//+private +package strings + +import "core:unicode/utf8" + +Ascii_Set :: distinct [8]u32; + +ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check { + for i in 0..= utf8.RUNE_SELF { + return; + } + as[c>>5] |= 1 << uint(c&31); + } + ok = true; + return; +} + +ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check { + return as[c>>5] & (1<<(c&31)) != 0; +} \ No newline at end of file diff --git a/core/strings/strings.odin b/core/strings/strings.odin index fac0879a1..70ceca26a 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -479,17 +479,34 @@ last_index :: proc(s, substr: string) -> int { return -1; } + index_any :: proc(s, chars: string) -> int { if chars == "" { return -1; } - - // TODO(bill): Optimize - for r, i in s { - for c in chars { - if r == c { - return i; + + if len(chars) == 1 { + r := rune(chars[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + return index_rune(s, r); + } + + if len(s) > 8 { + if as, ok := ascii_set_make(chars); ok { + for i in 0..= 0 { + return i; } } return -1; @@ -499,14 +516,46 @@ last_index_any :: proc(s, chars: string) -> int { if chars == "" { return -1; } + + if len(s) == 1 { + r := rune(s[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + return index_rune(chars, r); + } + + if len(s) > 8 { + if as, ok := ascii_set_make(chars); ok { + for i := len(s)-1; i >= 0; i -= 1 { + if ascii_set_contains(as, s[i]) { + return i; + } + } + return -1; + } + } + + if len(chars) == 1 { + r := rune(chars[0]); + if r >= utf8.RUNE_SELF { + r = utf8.RUNE_ERROR; + } + for i := len(s); i > 0; /**/ { + c, w := utf8.decode_last_rune_in_string(s[:i]); + i -= w; + if c == r { + return i; + } + } + return -1; + } - for i := len(s); i > 0; { + for i := len(s); i > 0; /**/ { r, w := utf8.decode_last_rune_in_string(s[:i]); i -= w; - for c in chars { - if r == c { - return i; - } + if index_rune(chars, r) >= 0 { + return i; } } return -1; -- cgit v1.2.3