diff options
| author | gingerBill <bill@gingerbill.org> | 2021-03-18 13:26:33 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2021-03-18 13:26:33 +0000 |
| commit | 333741222827db990ef2bce11873816e66bc1633 (patch) | |
| tree | ed9d6f3b4c7f6ce6b5d2be996e3dbdb9c734f613 /core/bytes | |
| parent | e3f9d99a3b3c2e8222856a0c1dc56837de26a417 (diff) | |
`split*_iterator` procedures for package bytes and strings
Diffstat (limited to 'core/bytes')
| -rw-r--r-- | core/bytes/bytes.odin | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/core/bytes/bytes.odin b/core/bytes/bytes.odin index a18772086..c951e19a0 100644 --- a/core/bytes/bytes.odin +++ b/core/bytes/bytes.odin @@ -203,6 +203,62 @@ split_after_n :: proc(s, sep: []byte, n: int, allocator := context.allocator) -> +@private +_split_iterator :: proc(s: ^[]byte, sep: []byte, sep_save, n: int) -> (res: []byte, ok: bool) { + s, n := s, n; + + if n == 0 { + return; + } + + if sep == nil { + res = s[:]; + ok = true; + s^ = s[len(s):]; + return; + } + + if n < 0 { + n = count(s^, sep) + 1; + } + + n -= 1; + + i := 0; + for ; i < n; i += 1 { + m := index(s^, sep); + if m < 0 { + break; + } + res = s[:m+sep_save]; + ok = true; + s^ = s[m+len(sep):]; + return; + } + res = s[:]; + ok = res != nil; + s^ = s[len(s):]; + return; +} + + +split_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) { + return _split_iterator(s, sep, 0, -1); +} + +split_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) { + return _split_iterator(s, sep, 0, n); +} + +split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) { + return _split_iterator(s, sep, len(sep), -1); +} + +split_after_n_iterator :: proc(s: ^[]byte, sep: []byte, n: int) -> ([]byte, bool) { + return _split_iterator(s, sep, len(sep), n); +} + + index_byte :: proc(s: []byte, c: byte) -> int { for i := 0; i < len(s); i += 1 { @@ -743,6 +799,60 @@ split_multi :: proc(s: []byte, substrs: [][]byte, skip_empty := false, allocator return buf; } + + +split_multi_iterator :: proc(s: ^[]byte, substrs: [][]byte, skip_empty := false) -> ([]byte, bool) #no_bounds_check { + if s == nil || s^ == nil || len(substrs) <= 0 { + return nil, false; + } + + sublen := len(substrs[0]); + + for substr in substrs[1:] { + sublen = min(sublen, len(substr)); + } + + shared := len(s) - sublen; + + if shared <= 0 { + return nil, false; + } + + // index, last + i, l := 0, 0; + + loop: for i <= shared { + for substr in substrs { + if string(s[i:i+sublen]) == string(substr) { + if !skip_empty || i - l > 0 { + res := s[l:i]; + s^ = s[i:]; + return res, true; + } + + i += sublen; + l = i; + + continue loop; + } + } + + _, skip := utf8.decode_rune(s[i:]); + i += skip; + } + + if !skip_empty || len(s) - l > 0 { + res := s[l:]; + s^ = s[len(s):]; + return res, true; + } + + return nil, false; +} + + + + // scrub scruvs invalid utf-8 characters and replaces them with the replacement string // Adjacent invalid bytes are only replaced once scrub :: proc(s: []byte, replacement: []byte, allocator := context.allocator) -> []byte { |