aboutsummaryrefslogtreecommitdiff
path: root/core/strings
diff options
context:
space:
mode:
authorMichael Kutowski <skytrias@protonmail.com>2022-03-27 11:39:17 +0200
committerGitHub <noreply@github.com>2022-03-27 11:39:17 +0200
commit58f4d533b72d199848e4ebb291b7737312b4957a (patch)
tree631e9f68467baf8073b1ad41bf7f2acad80a3542 /core/strings
parent92f985abd5c4e5017a644266816fb2b8326157be (diff)
add string documentation & examples, fix & cleanup string_multi
Diffstat (limited to 'core/strings')
-rw-r--r--core/strings/strings.odin659
1 files changed, 525 insertions, 134 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index e5bd60d33..452c0ca0c 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -1,16 +1,21 @@
+// simple procedures to manipulate UTF-8 encoded strings
package strings
import "core:io"
import "core:mem"
+import "core:slice"
import "core:unicode"
import "core:unicode/utf8"
+// returns a clone of the string `s` allocated using the `allocator`
clone :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> string {
c := make([]byte, len(s), allocator, loc)
copy(c, s)
return string(c[:len(s)])
}
+// returns a clone of the string `s` allocated using the `allocator` as a cstring
+// a nul byte is appended to the clone, to make the cstring safe
clone_to_cstring :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> cstring {
c := make([]byte, len(s)+1, allocator, loc)
copy(c, s)
@@ -18,27 +23,35 @@ clone_to_cstring :: proc(s: string, allocator := context.allocator, loc := #call
return cstring(&c[0])
}
+// returns a string from a byte pointer `ptr` and byte length `len`
+// the string is valid as long as the parameters stay alive
string_from_ptr :: proc(ptr: ^byte, len: int) -> string {
return transmute(string)mem.Raw_String{ptr, len}
}
+// returns a string from a byte pointer `ptr and byte length `len`
+// searches for a nul byte from 0..<len, otherwhise `len` will be the end size
string_from_nul_terminated_ptr :: proc(ptr: ^byte, len: int) -> string {
s := transmute(string)mem.Raw_String{ptr, len}
s = truncate_to_byte(s, 0)
return s
}
-
+// returns the raw ^byte start of the string `str`
ptr_from_string :: proc(str: string) -> ^byte {
d := transmute(mem.Raw_String)str
return d.data
}
+// returns the transmute of string `str` to a cstring
+// not safe since the origin string may not contain a nul byte
unsafe_string_to_cstring :: proc(str: string) -> cstring {
d := transmute(mem.Raw_String)str
return cstring(d.data)
}
+// returns a string truncated to the first time it finds the byte `b`
+// uses the `len` of the string `str` when it couldn't find the input
truncate_to_byte :: proc(str: string, b: byte) -> string {
n := index_byte(str, b)
if n < 0 {
@@ -46,6 +59,9 @@ truncate_to_byte :: proc(str: string, b: byte) -> string {
}
return str[:n]
}
+
+// returns a string truncated to the first time it finds the rune `r`
+// uses the `len` of the string `str` when it couldn't find the input
truncate_to_rune :: proc(str: string, r: rune) -> string {
n := index_rune(str, r)
if n < 0 {
@@ -54,20 +70,28 @@ truncate_to_rune :: proc(str: string, r: rune) -> string {
return str[:n]
}
+// returns a cloned string of the byte array `s` using the `allocator`
+// appends a leading nul byte
clone_from_bytes :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> string {
c := make([]byte, len(s)+1, allocator, loc)
copy(c, s)
c[len(s)] = 0
return string(c[:len(s)])
}
+
+// returns a clone of the cstring `s` using the `allocator` as a string
clone_from_cstring :: proc(s: cstring, allocator := context.allocator, loc := #caller_location) -> string {
return clone(string(s), allocator, loc)
}
+
+// returns a cloned string from the pointer `ptr` and a byte length `len` using the `allocator`
+// same to `string_from_ptr` but allocates
clone_from_ptr :: proc(ptr: ^byte, len: int, allocator := context.allocator, loc := #caller_location) -> string {
s := string_from_ptr(ptr, len)
return clone(s, allocator, loc)
}
+// overload to clone from a `string`, `[]byte`, `cstring` or a `^byte + length` to a string
clone_from :: proc{
clone,
clone_from_bytes,
@@ -75,6 +99,8 @@ clone_from :: proc{
clone_from_ptr,
}
+// returns a cloned string from the cstring `ptr` and a byte length `len` using the `allocator`
+// truncates till the first nul byte it finds or the byte len
clone_from_cstring_bounded :: proc(ptr: cstring, len: int, allocator := context.allocator, loc := #caller_location) -> string {
s := string_from_ptr((^u8)(ptr), len)
s = truncate_to_byte(s, 0)
@@ -82,11 +108,12 @@ clone_from_cstring_bounded :: proc(ptr: cstring, len: int, allocator := context.
}
// Compares two strings, returning a value representing which one comes first lexiographically.
-// -1 for `a`; 1 for `b`, or 0 if they are equal.
+// -1 for `lhs`; 1 for `rhs`, or 0 if they are equal.
compare :: proc(lhs, rhs: string) -> int {
return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
}
+// returns the byte offset of the rune `r` in the string `s`, -1 when not found
contains_rune :: proc(s: string, r: rune) -> int {
for c, offset in s {
if c == r {
@@ -96,20 +123,48 @@ contains_rune :: proc(s: string, r: rune) -> int {
return -1
}
+/*
+ returns true when the string `substr` is contained inside the string `s`
+
+ strings.contains("testing", "test") -> true
+ strings.contains("testing", "ing") -> true
+ strings.contains("testing", "text") -> false
+*/
contains :: proc(s, substr: string) -> bool {
return index(s, substr) >= 0
}
+/*
+ returns true when the string `s` contains any of the characters inside the string `chars`
+
+ strings.contains_any("test", "test") -> true
+ strings.contains_any("test", "ts") -> true
+ strings.contains_any("test", "et") -> true
+ strings.contains_any("test", "a") -> false
+*/
contains_any :: proc(s, chars: string) -> bool {
return index_any(s, chars) >= 0
}
+/*
+ returns the utf8 rune count of the string `s`
+ strings.rune_count("test") -> 4
+ strings.rune_count("testö") -> 5, where len("testö") -> 6
+*/
rune_count :: proc(s: string) -> int {
return utf8.rune_count_in_string(s)
}
+/*
+ returns wether the strings `u` and `v` are the same alpha characters
+ works with utf8 string content and ignores different casings
+ strings.equal_fold("test", "test") -> true
+ strings.equal_fold("Test", "test") -> true
+ strings.equal_fold("Test", "tEsT") -> true
+ strings.equal_fold("test", "tes") -> false
+*/
equal_fold :: proc(u, v: string) -> bool {
s, t := u, v
loop: for s != "" && t != "" {
@@ -153,15 +208,39 @@ equal_fold :: proc(u, v: string) -> bool {
return s == t
}
+/*
+ return true when the string `prefix` is contained at the start of the string `s`
+
+ strings.has_prefix("testing", "test") -> true
+ strings.has_prefix("testing", "te") -> true
+ strings.has_prefix("telephone", "te") -> true
+ strings.has_prefix("testing", "est") -> false
+*/
has_prefix :: proc(s, prefix: string) -> bool {
return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
}
+/*
+ returns true when the string `suffix` is contained at the end of the string `s`
+ good example to use this is for file extensions
+
+ strings.has_suffix("todo.txt", ".txt") -> true
+ strings.has_suffix("todo.doc", ".txt") -> false
+ strings.has_suffix("todo.doc.txt", ".txt") -> true
+*/
has_suffix :: proc(s, suffix: string) -> bool {
return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}
+/*
+ returns a combined string from the slice of strings `a` seperated with the `sep` string
+ allocates the string using the `allocator`
+ a := [?]string { "a", "b", "c" }
+ b := strings.join(a[:], " ") -> "a b c"
+ c := strings.join(a[:], "-") -> "a-b-c"
+ d := strings.join(a[:], "...") -> "a...b...c"
+*/
join :: proc(a: []string, sep: string, allocator := context.allocator) -> string {
if len(a) == 0 {
return ""
@@ -181,6 +260,14 @@ join :: proc(a: []string, sep: string, allocator := context.allocator) -> string
return string(b)
}
+/*
+ returns a combined string from the slice of strings `a` without a seperator
+ allocates the string using the `allocator`
+
+
+ a := [?]string { "a", "b", "c" }
+ b := strings.concatenate(a[:]) -> "abc"
+*/
concatenate :: proc(a: []string, allocator := context.allocator) -> string {
if len(a) == 0 {
return ""
@@ -199,8 +286,13 @@ concatenate :: proc(a: []string, allocator := context.allocator) -> string {
}
/*
+
`rune_offset` and `rune_length` are in runes, not bytes.
If `rune_length` <= 0, then it'll return the remainder of the string starting with `rune_offset`.
+
+ strings.cut("some example text", 0, 4) -> "some"
+ strings.cut("some example text", 2, 2) -> "me"
+ strings.cut("some example text", 5, 7) -> "example"
*/
cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator := context.allocator) -> (res: string) {
s := s; rune_length := rune_length
@@ -307,17 +399,37 @@ split_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []str
return _split(s, sep, 0, n, allocator)
}
+/*
+ splits the string `s` after the seperator string `sep` appears
+ returns the slice of split strings allocated using `allocator`
+
+ a := "aaa.bbb.ccc.ddd.eee"
+ aa := strings.split_after(a, ".")
+ fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee]
+*/
split_after :: proc(s, sep: string, allocator := context.allocator) -> []string {
return _split(s, sep, len(sep), -1, allocator)
}
+/*
+ splits the string `s` after the seperator string `sep` appears into a total of `n` parts
+ returns the slice of split strings allocated using `allocator`
+
+ a := "aaa.bbb.ccc.ddd.eee"
+ aa := strings.split_after(a, ".")
+ fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee]
+*/
split_after_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []string {
return _split(s, sep, len(sep), n, allocator)
}
-
@private
_split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string, ok: bool) {
+ // stop once the string is empty or nil
+ if s == nil || len(s^) == 0 {
+ return
+ }
+
if sep == "" {
res = s[:]
ok = true
@@ -339,8 +451,16 @@ _split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string,
return
}
-@private
-_split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) {
+/*
+ split the ^string `s` by the byte seperator `sep` in an iterator fashion
+ consumes the original string till the end, leaving the string `s` with len == 0
+
+ text := "a.b.c.d.e"
+ for str in strings.split_by_byte_iterator(&text, '.') {
+ fmt.eprintln(str) // every loop -> a b c d e
+ }
+*/
+split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) {
m := index_byte(s^, sep)
if m < 0 {
// not found
@@ -355,14 +475,28 @@ _split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool)
return
}
-split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (string, bool) {
- return _split_by_byte_iterator(s, sep)
-}
+/*
+ split the ^string `s` by the seperator string `sep` in an iterator fashion
+ consumes the original string till the end
+ text := "a.b.c.d.e"
+ for str in strings.split_iterator(&text, ".") {
+ fmt.eprintln(str) // every loop -> a b c d e
+ }
+*/
split_iterator :: proc(s: ^string, sep: string) -> (string, bool) {
return _split_iterator(s, sep, 0)
}
+/*
+ split the ^string `s` after every seperator string `sep` in an iterator fashion
+ consumes the original string till the end
+
+ text := "a.b.c.d.e"
+ for str in strings.split_after_iterator(&text, ".") {
+ fmt.eprintln(str) // every loop -> a. b. c. d. e
+ }
+*/
split_after_iterator :: proc(s: ^string, sep: string) -> (string, bool) {
return _split_iterator(s, sep, len(sep))
}
@@ -379,6 +513,14 @@ _trim_cr :: proc(s: string) -> string {
return s
}
+/*
+ split the string `s` at every line break '\n'
+ return an allocated slice of strings
+
+ a := "a\nb\nc\nd\ne"
+ b := strings.split_lines(a)
+ fmt.eprintln(b) // [a, b, c, d, e]
+*/
split_lines :: proc(s: string, allocator := context.allocator) -> []string {
sep :: "\n"
lines := _split(s, sep, 0, -1, allocator)
@@ -388,6 +530,14 @@ split_lines :: proc(s: string, allocator := context.allocator) -> []string {
return lines
}
+/*
+ split the string `s` at every line break '\n' for `n` parts
+ return an allocated slice of strings
+
+ a := "a\nb\nc\nd\ne"
+ b := strings.split_lines_n(a, 3)
+ fmt.eprintln(b) // [a, b, c, d\ne\n]
+*/
split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []string {
sep :: "\n"
lines := _split(s, sep, 0, n, allocator)
@@ -397,6 +547,14 @@ split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []st
return lines
}
+/*
+ split the string `s` at every line break '\n' leaving the '\n' in the resulting strings
+ return an allocated slice of strings
+
+ a := "a\nb\nc\nd\ne"
+ b := strings.split_lines_after(a)
+ fmt.eprintln(b) // [a\n, b\n, c\n, d\n, e\n]
+*/
split_lines_after :: proc(s: string, allocator := context.allocator) -> []string {
sep :: "\n"
lines := _split(s, sep, len(sep), -1, allocator)
@@ -406,6 +564,15 @@ split_lines_after :: proc(s: string, allocator := context.allocator) -> []string
return lines
}
+/*
+ split the string `s` at every line break '\n' leaving the '\n' in the resulting strings
+ only runs for `n` parts
+ return an allocated slice of strings
+
+ a := "a\nb\nc\nd\ne"
+ b := strings.split_lines_after_n(a, 3)
+ fmt.eprintln(b) // [a\n, b\n, c\n, d\ne\n]
+*/
split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) -> []string {
sep :: "\n"
lines := _split(s, sep, len(sep), n, allocator)
@@ -415,21 +582,45 @@ split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) -
return lines
}
+/*
+ split the string `s` at every line break '\n'
+ returns the current split string every iteration till the string is consumed
+
+ text := "a\nb\nc\nd\ne"
+ for str in strings.split_lines_iterator(&text) {
+ fmt.eprintln(text) // every loop -> a b c d e
+ }
+*/
split_lines_iterator :: proc(s: ^string) -> (line: string, ok: bool) {
sep :: "\n"
line = _split_iterator(s, sep, 0) or_return
return _trim_cr(line), true
}
+/*
+ split the string `s` at every line break '\n'
+ returns the current split string every iteration till the string is consumed
+
+ text := "a\nb\nc\nd\ne"
+ for str in strings.split_lines_after_iterator(&text) {
+ fmt.eprintln(text) // every loop -> a\n b\n c\n d\n e\n
+ }
+*/
split_lines_after_iterator :: proc(s: ^string) -> (line: string, ok: bool) {
sep :: "\n"
line = _split_iterator(s, sep, len(sep)) or_return
return _trim_cr(line), true
}
+/*
+ returns the byte offset of the first byte `c` in the string `s` it finds, -1 when not found
+ can't find utf8 based runes
-
-
+ strings.index_byte("test", 't') -> 0
+ strings.index_byte("test", 'e') -> 1
+ strings.index_byte("test", 'x') -> -1
+ strings.index_byte("teäst", 'ä') -> -1
+*/
index_byte :: proc(s: string, c: byte) -> int {
for i := 0; i < len(s); i += 1 {
if s[i] == c {
@@ -439,7 +630,15 @@ index_byte :: proc(s: string, c: byte) -> int {
return -1
}
-// Returns -1 if c is not present
+/*
+ returns the byte offset of the last byte `c` in the string `s` it finds, -1 when not found
+ can't find utf8 based runes
+
+ strings.index_byte("test", 't') -> 3
+ strings.index_byte("test", 'e') -> 1
+ strings.index_byte("test", 'x') -> -1
+ strings.index_byte("teäst", 'ä') -> -1
+*/
last_index_byte :: proc(s: string, c: byte) -> int {
for i := len(s)-1; i >= 0; i -= 1 {
if s[i] == c {
@@ -450,9 +649,50 @@ last_index_byte :: proc(s: string, c: byte) -> int {
}
+/*
+ returns the byte offset of the first rune `r` in the string `s` it finds, -1 when not found
+ avoids invalid runes
+
+ strings.index_rune("abcädef", 'x') -> -1
+ strings.index_rune("abcädef", 'a') -> 0
+ strings.index_rune("abcädef", 'b') -> 1
+ strings.index_rune("abcädef", 'c') -> 2
+ strings.index_rune("abcädef", 'ä') -> 3
+ strings.index_rune("abcädef", 'd') -> 5
+ strings.index_rune("abcädef", 'e') -> 6
+ strings.index_rune("abcädef", 'f') -> 7
+*/
+index_rune :: proc(s: string, r: rune) -> int {
+ switch {
+ case 0 <= r && r < utf8.RUNE_SELF:
+ return index_byte(s, byte(r))
+
+ case r == utf8.RUNE_ERROR:
+ for c, i in s {
+ if c == utf8.RUNE_ERROR {
+ return i
+ }
+ }
+ return -1
+
+ case !utf8.valid_rune(r):
+ return -1
+ }
+
+ b, w := utf8.encode_rune(r)
+ return index(s, string(b[:w]))
+}
@private PRIME_RABIN_KARP :: 16777619
+/*
+ returns the byte offset of the string `substr` in the string `s`, -1 when not found
+
+ strings.index("test", "t") -> 0
+ strings.index("test", "te") -> 0
+ strings.index("test", "st") -> 2
+ strings.index("test", "tt") -> -1
+*/
index :: proc(s, substr: string) -> int {
hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
for i := 0; i < len(s); i += 1 {
@@ -503,6 +743,14 @@ index :: proc(s, substr: string) -> int {
return -1
}
+/*
+ returns the last byte offset of the string `substr` in the string `s`, -1 when not found
+
+ strings.index("test", "t") -> 3
+ strings.index("test", "te") -> 0
+ strings.index("test", "st") -> 2
+ strings.index("test", "tt") -> -1
+*/
last_index :: proc(s, substr: string) -> int {
hash_str_rabin_karp_reverse :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
for i := len(s) - 1; i >= 0; i -= 1 {
@@ -551,7 +799,15 @@ last_index :: proc(s, substr: string) -> int {
return -1
}
-// index_any returns the index of the first char of `chars` found in `s`. -1 if not found.
+/*
+ returns the index of any first char of `chars` found in `s`, -1 if not found
+
+ strings.index_any("test", "s") -> 2
+ strings.index_any("test", "se") -> 1
+ strings.index_any("test", "et") -> 0
+ strings.index_any("test", "set") -> 0
+ strings.index_any("test", "x") -> -1
+*/
index_any :: proc(s, chars: string) -> int {
if chars == "" {
return -1
@@ -584,6 +840,16 @@ index_any :: proc(s, chars: string) -> int {
return -1
}
+/*
+ returns the index of any first char of `chars` found in `s`, -1 if not found
+ iterates the string in reverse
+
+ strings.index_any("test", "s") -> 2
+ strings.index_any("test", "se") -> 2
+ strings.index_any("test", "et") -> 1
+ strings.index_any("test", "set") -> 3
+ strings.index_any("test", "x") -> -1
+*/
last_index_any :: proc(s, chars: string) -> int {
if chars == "" {
return -1
@@ -633,6 +899,16 @@ last_index_any :: proc(s, chars: string) -> int {
return -1
}
+/*
+ returns the count of the string `substr` found in the string `s`
+ returns the rune_count + 1 of the string `s` on empty `substr`
+
+ strings.count("abbccc", "a") -> 1
+ strings.count("abbccc", "b") -> 2
+ strings.count("abbccc", "c") -> 3
+ strings.count("abbccc", "ab") -> 1
+ strings.count("abbccc", " ") -> 0
+*/
count :: proc(s, substr: string) -> int {
if len(substr) == 0 { // special case
return rune_count(s) + 1
@@ -668,7 +944,12 @@ count :: proc(s, substr: string) -> int {
return n
}
+/*
+ repeats the string `s` multiple `count` times and returns the allocated string
+ panics when `count` is below 0
+ strings.repeat("abc", 2) -> "abcabc"
+*/
repeat :: proc(s: string, count: int, allocator := context.allocator) -> string {
if count < 0 {
panic("strings: negative repeat count")
@@ -685,11 +966,28 @@ repeat :: proc(s: string, count: int, allocator := context.allocator) -> string
return string(b)
}
+/*
+ replaces all instances of `old` in the string `s` with the `new` string
+ returns the `output` string and true when an a allocation through a replace happened
+
+ strings.replace_all("xyzxyz", "xyz", "abc") -> "abcabc", true
+ strings.replace_all("xyzxyz", "abc", "xyz") -> "xyzxyz", false
+ strings.replace_all("xyzxyz", "xy", "z") -> "zzzz", true
+*/
replace_all :: proc(s, old, new: string, allocator := context.allocator) -> (output: string, was_allocation: bool) {
return replace(s, old, new, -1, allocator)
}
-// if n < 0, no limit on the number of replacements
+/*
+ replaces `n` instances of `old` in the string `s` with the `new` string
+ if n < 0, no limit on the number of replacements
+ returns the `output` string and true when an a allocation through a replace happened
+
+ strings.replace("xyzxyz", "xyz", "abc", 2) -> "abcabc", true
+ strings.replace("xyzxyz", "xyz", "abc", 1) -> "abcxyz", true
+ strings.replace("xyzxyz", "abc", "xyz", -1) -> "xyzxyz", false
+ strings.replace("xyzxyz", "xy", "z", -1) -> "zzzz", true
+*/
replace :: proc(s, old, new: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) {
if old == new || n == 0 {
was_allocation = false
@@ -730,17 +1028,35 @@ replace :: proc(s, old, new: string, n: int, allocator := context.allocator) ->
return
}
+/*
+ removes the `key` string `n` times from the `s` string
+ if n < 0, no limit on the number of removes
+ returns the `output` string and true when an a allocation through a remove happened
+
+ strings.remove("abcabc", "abc", 1) -> "abc", true
+ strings.remove("abcabc", "abc", -1) -> "", true
+ strings.remove("abcabc", "a", -1) -> "bcbc", true
+ strings.remove("abcabc", "x", -1) -> "abcabc", false
+*/
remove :: proc(s, key: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) {
return replace(s, key, "", n, allocator)
}
+/*
+ removes all the `key` string instanes from the `s` string
+ returns the `output` string and true when an a allocation through a remove happened
+
+ strings.remove("abcabc", "abc") -> "", true
+ strings.remove("abcabc", "a") -> "bcbc", true
+ strings.remove("abcabc", "x") -> "abcabc", false
+*/
remove_all :: proc(s, key: string, allocator := context.allocator) -> (output: string, was_allocation: bool) {
return remove(s, key, -1, allocator)
}
@(private) _ascii_space := [256]bool{'\t' = true, '\n' = true, '\v' = true, '\f' = true, '\r' = true, ' ' = true}
-
+// return true when the `r` rune is '\t', '\n', '\v', '\f', '\r' or ' '
is_ascii_space :: proc(r: rune) -> bool {
if r < utf8.RUNE_SELF {
return _ascii_space[u8(r)]
@@ -748,6 +1064,7 @@ is_ascii_space :: proc(r: rune) -> bool {
return false
}
+// returns true when the `r` rune is any asci or utf8 based whitespace
is_space :: proc(r: rune) -> bool {
if r < 0x2000 {
switch r {
@@ -766,10 +1083,24 @@ is_space :: proc(r: rune) -> bool {
return false
}
+// returns true when the `r` rune is a nul byte
is_null :: proc(r: rune) -> bool {
return r == 0x0000
}
+/*
+ runs trough the `s` string linearly and watches wether the `p` procedure matches the `truth` bool
+ returns the rune offset or -1 when no match was found
+
+ call :: proc(r: rune) -> bool {
+ return r == 'a'
+ }
+ strings.index_proc("abcabc", call) -> 0
+ strings.index_proc("cbacba", call) -> 2
+ strings.index_proc("cbacba", call, false) -> 0
+ strings.index_proc("abcabc", call, false) -> 1
+ strings.index_proc("xyz", call) -> -1
+*/
index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
for r, i in s {
if p(r) == truth {
@@ -779,6 +1110,7 @@ index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
return -1
}
+// same as `index_proc` but with a `p` procedure taking a rawptr for state
index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
for r, i in s {
if p(state, r) == truth {
@@ -788,6 +1120,7 @@ index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: r
return -1
}
+// same as `index_proc` but runs through the string in reverse
last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
// TODO(bill): Probably use Rabin-Karp Search
for i := len(s); i > 0; {
@@ -800,6 +1133,7 @@ last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int
return -1
}
+// same as `index_proc_with_state` but runs through the string in reverse
last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
// TODO(bill): Probably use Rabin-Karp Search
for i := len(s); i > 0; {
@@ -811,7 +1145,17 @@ last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta
}
return -1
}
+
+/*
+ trims the input string `s` until the procedure `p` returns false
+ does not allocate - only returns a cut variant of the input string
+ returns an empty string when no match was found at all
+ find :: proc(r: rune) -> bool {
+ return r != 'i'
+ }
+ strings.trim_left_proc("testing", find) -> "ing"
+*/
trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
i := index_proc(s, p, false)
if i == -1 {
@@ -820,29 +1164,10 @@ trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
return s[i:]
}
-
-index_rune :: proc(s: string, r: rune) -> int {
- switch {
- case 0 <= r && r < utf8.RUNE_SELF:
- return index_byte(s, byte(r))
-
- case r == utf8.RUNE_ERROR:
- for c, i in s {
- if c == utf8.RUNE_ERROR {
- return i
- }
- }
- return -1
-
- case !utf8.valid_rune(r):
- return -1
- }
-
- b, w := utf8.encode_rune(r)
- return index(s, string(b[:w]))
-}
-
-
+/*
+ trims the input string `s` until the procedure `p` with state returns false
+ returns an empty string when no match was found at all
+*/
trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
i := index_proc_with_state(s, p, state, false)
if i == -1 {
@@ -851,6 +1176,16 @@ trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, stat
return s[i:]
}
+/*
+ trims the input string `s` from the right until the procedure `p` returns false
+ does not allocate - only returns a cut variant of the input string
+ returns an empty string when no match was found at all
+
+ find :: proc(r: rune) -> bool {
+ return r != 't'
+ }
+ strings.trim_left_proc("testing", find) -> "test"
+*/
trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
i := last_index_proc(s, p, false)
if i >= 0 && s[i] >= utf8.RUNE_SELF {
@@ -862,6 +1197,10 @@ trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
return s[0:i]
}
+/*
+ trims the input string `s` from the right until the procedure `p` with state returns false
+ returns an empty string when no match was found at all
+*/
trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
i := last_index_proc_with_state(s, p, state, false)
if i >= 0 && s[i] >= utf8.RUNE_SELF {
@@ -873,7 +1212,7 @@ trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta
return s[0:i]
}
-
+// procedure for `trim_*_proc` variants, which has a string rawptr cast + rune comparison
is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
if state == nil {
return false
@@ -887,7 +1226,7 @@ is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
return false
}
-
+// trims the `cutset` string from the `s` string
trim_left :: proc(s: string, cutset: string) -> string {
if s == "" || cutset == "" {
return s
@@ -896,6 +1235,7 @@ trim_left :: proc(s: string, cutset: string) -> string {
return trim_left_proc_with_state(s, is_in_cutset, &state)
}
+// trims the `cutset` string from the `s` string from the right
trim_right :: proc(s: string, cutset: string) -> string {
if s == "" || cutset == "" {
return s
@@ -904,35 +1244,48 @@ trim_right :: proc(s: string, cutset: string) -> string {
return trim_right_proc_with_state(s, is_in_cutset, &state)
}
+// trims the `cutset` string from the `s` string, both from left and right
trim :: proc(s: string, cutset: string) -> string {
return trim_right(trim_left(s, cutset), cutset)
}
+// trims until a valid non space rune: "\t\txyz\t\t" -> "xyz\t\t"
trim_left_space :: proc(s: string) -> string {
return trim_left_proc(s, is_space)
}
+// trims from the right until a valid non space rune: "\t\txyz\t\t" -> "\t\txyz"
trim_right_space :: proc(s: string) -> string {
return trim_right_proc(s, is_space)
}
+// trims from both sides until a valid non space rune: "\t\txyz\t\t" -> "xyz"
trim_space :: proc(s: string) -> string {
return trim_right_space(trim_left_space(s))
}
-
+// trims nul runes from the left: "\x00\x00testing\x00\x00" -> "testing\x00\x00"
trim_left_null :: proc(s: string) -> string {
return trim_left_proc(s, is_null)
}
+// trims nul runes from the right: "\x00\x00testing\x00\x00" -> "\x00\x00testing"
trim_right_null :: proc(s: string) -> string {
return trim_right_proc(s, is_null)
}
+// trims nul runes from both sides: "\x00\x00testing\x00\x00" -> "testing"
trim_null :: proc(s: string) -> string {
return trim_right_null(trim_left_null(s))
}
+/*
+ trims a `prefix` string from the start of the `s` string and returns the trimmed string
+ returns the input string `s` when no prefix was found
+
+ strings.trim_prefix("testing", "test") -> "ing"
+ strings.trim_prefix("testing", "abc") -> "testing"
+*/
trim_prefix :: proc(s, prefix: string) -> string {
if has_prefix(s, prefix) {
return s[len(prefix):]
@@ -940,6 +1293,13 @@ trim_prefix :: proc(s, prefix: string) -> string {
return s
}
+/*
+ trims a `suffix` string from the end of the `s` string and returns the trimmed string
+ returns the input string `s` when no suffix was found
+
+ strings.trim_suffix("todo.txt", ".txt") -> "todo"
+ strings.trim_suffix("todo.doc", ".txt") -> "todo.doc"
+*/
trim_suffix :: proc(s, suffix: string) -> string {
if has_suffix(s, suffix) {
return s[:len(s)-len(suffix)]
@@ -947,142 +1307,151 @@ trim_suffix :: proc(s, suffix: string) -> string {
return s
}
-split_multi :: proc(s: string, substrs: []string, skip_empty := false, allocator := context.allocator) -> []string #no_bounds_check {
+/*
+ splits the input string `s` by all possible `substrs` []string
+ returns the allocated []string, nil on any empty substring or no matches
+
+ splits := [?]string { "---", "~~~", ".", "_", "," }
+ res := strings.split_multi("testing,this.out_nice---done~~~last", splits[:])
+ fmt.eprintln(res) // -> [testing, this, out, nice, done, last]
+*/
+split_multi :: proc(s: string, substrs: []string, allocator := context.allocator) -> (buf: []string) #no_bounds_check {
if s == "" || len(substrs) <= 0 {
- return nil
+ return
}
- sublen := len(substrs[0])
-
- for substr in substrs[1:] {
- sublen = min(sublen, len(substr))
+ // disallow "" substr
+ for substr in substrs {
+ if len(substr) == 0 {
+ return
+ }
}
- shared := len(s) - sublen
+ // TODO maybe remove duplicate substrs
+ // sort substrings by string size, largest to smallest
+ temp_substrs := slice.clone(substrs, context.temp_allocator)
+ slice.sort_by(temp_substrs, proc(a, b: string) -> bool {
+ return len(a) > len(b)
+ })
- if shared <= 0 {
- return nil
- }
+ substrings_found: int
+ temp := s
- // number, index, last
- n, i, l := 0, 0, 0
-
- // count results
- first_pass: for i <= shared {
- for substr in substrs {
- if s[i:i+sublen] == substr {
- if !skip_empty || i - l > 0 {
- n += 1
- }
-
- i += sublen
- l = i
+ // count substr results found in string
+ first_pass: for len(temp) > 0 {
+ for substr in temp_substrs {
+ size := len(substr)
+ // check range and compare string to substr
+ if size <= len(temp) && temp[:size] == substr {
+ substrings_found += 1
+ temp = temp[size:]
continue first_pass
}
}
- _, skip := utf8.decode_rune_in_string(s[i:])
- i += skip
+ // step through string
+ _, skip := utf8.decode_rune_in_string(temp[:])
+ temp = temp[skip:]
}
- if !skip_empty || len(s) - l > 0 {
- n += 1
+ // skip when no results
+ if substrings_found < 1 {
+ return
}
- if n < 1 {
- // no results
- return nil
- }
-
- buf := make([]string, n, allocator)
-
- n, i, l = 0, 0, 0
-
- // slice results
- second_pass: for i <= shared {
- for substr in substrs {
- if s[i:i+sublen] == substr {
- if !skip_empty || i - l > 0 {
- buf[n] = s[l:i]
- n += 1
- }
+ buf = make([]string, substrings_found + 1, allocator)
+ buf_index: int
+ temp = s
+ temp_old := temp
- i += sublen
- l = i
+ // gather results in the same fashion
+ second_pass: for len(temp) > 0 {
+ for substr in temp_substrs {
+ size := len(substr)
+ // check range and compare string to substr
+ if size <= len(temp) && temp[:size] == substr {
+ buf[buf_index] = temp_old[:len(temp_old) - len(temp)]
+ buf_index += 1
+ temp = temp[size:]
+ temp_old = temp
continue second_pass
}
}
- _, skip := utf8.decode_rune_in_string(s[i:])
- i += skip
+ // step through string
+ _, skip := utf8.decode_rune_in_string(temp[:])
+ temp = temp[skip:]
}
- if !skip_empty || len(s) - l > 0 {
- buf[n] = s[l:]
- }
+ buf[buf_index] = temp_old[:]
return buf
}
+// state for the split multi iterator
+Split_Multi :: struct {
+ temp: string,
+ temp_old: string,
+ substrs: []string,
+}
+// returns split multi state with sorted `substrs`
+split_multi_init :: proc(s: string, substrs: []string) -> Split_Multi {
+ // sort substrings, largest to smallest
+ temp_substrs := slice.clone(substrs, context.temp_allocator)
+ slice.sort_by(temp_substrs, proc(a, b: string) -> bool {
+ return len(a) > len(b)
+ })
-
-split_multi_iterator :: proc(s: ^string, substrs: []string, skip_empty := false) -> (string, bool) #no_bounds_check {
- if s == nil || s^ == "" || len(substrs) <= 0 {
- return "", false
- }
-
- sublen := len(substrs[0])
-
- for substr in substrs[1:] {
- sublen = min(sublen, len(substr))
+ return {
+ temp = s,
+ temp_old = s,
+ substrs = temp_substrs,
}
+}
- shared := len(s) - sublen
+/*
+ splits the input string `s` by all possible `substrs` []string in an iterator fashion
+ returns the split string every iteration, the full string on no match
- if shared <= 0 {
- return "", false
+ splits := [?]string { "---", "~~~", ".", "_", "," }
+ state := strings.split_multi_init("testing,this.out_nice---done~~~last", splits[:])
+ for str in strings.split_multi_iterate(&state) {
+ fmt.eprintln(str) // every iteration -> [testing, this, out, nice, done, last]
}
-
- // index, last
- i, l := 0, 0
-
- loop: for i <= shared {
+*/
+split_multi_iterate :: proc(using sm: ^Split_Multi) -> (res: string, ok: bool) #no_bounds_check {
+ pass: for len(temp) > 0 {
for substr in substrs {
- if s[i:i+sublen] == substr {
- if !skip_empty || i - l > 0 {
- res := s[l:i]
- s^ = s[i:]
- return res, true
- }
+ size := len(substr)
- i += sublen
- l = i
-
- continue loop
+ // check range and compare string to substr
+ if size <= len(temp) && temp[:size] == substr {
+ res = temp_old[:len(temp_old) - len(temp)]
+ temp = temp[size:]
+ temp_old = temp
+ ok = true
+ return
}
}
- _, skip := utf8.decode_rune_in_string(s[i:])
- i += skip
+ // step through string
+ _, skip := utf8.decode_rune_in_string(temp[:])
+ temp = temp[skip:]
}
- if !skip_empty || len(s) - l > 0 {
- res := s[l:]
- s^ = s[len(s):]
- return res, true
+ // allow last iteration
+ if temp_old != "" {
+ res = temp_old[:]
+ ok = true
+ temp_old = ""
}
- return "", false
+ return
}
-
-
-
-
-
// scrub scruvs invalid utf-8 characters and replaces them with the replacement string
// Adjacent invalid bytes are only replaced once
scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string {
@@ -1117,7 +1486,13 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) ->
return to_string(b)
}
+/*
+ returns a reversed version of the `s` string
+ a := "abcxyz"
+ b := strings.reverse(a)
+ fmt.eprintln(a, b) // abcxyz zyxcba
+*/
reverse :: proc(s: string, allocator := context.allocator) -> string {
str := s
n := len(str)
@@ -1133,12 +1508,19 @@ reverse :: proc(s: string, allocator := context.allocator) -> string {
return string(buf)
}
+/*
+ expands the string to a grid spaced by `tab_size` whenever a `\t` character appears
+ returns the tabbed string, panics on tab_size <= 0
+
+ strings.expand_tabs("abc1\tabc2\tabc3", 4) -> abc1 abc2 abc3
+ strings.expand_tabs("abc1\tabc2\tabc3", 5) -> abc1 abc2 abc3
+ strings.expand_tabs("abc1\tabc2\tabc3", 6) -> abc1 abc2 abc3
+*/
expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) -> string {
if tab_size <= 0 {
panic("tab size must be positive")
}
-
if s == "" {
return ""
}
@@ -1176,7 +1558,16 @@ expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) ->
return to_string(b)
}
-
+/*
+ splits the `str` string by the seperator `sep` string and returns 3 parts
+ `head`: before the split, `match`: the seperator, `tail`: the end of the split
+ returns the input string when the `sep` was not found
+
+ text := "testing this out"
+ strings.partition(text, " this ") -> head: "testing", match: " this ", tail: "out"
+ strings.partition(text, "hi") -> head: "testing t", match: "hi", tail: "s out"
+ strings.partition(text, "xyz") -> head: "testing this out", match: "", tail: ""
+*/
partition :: proc(str, sep: string) -> (head, match, tail: string) {
i := index(str, sep)
if i == -1 {
@@ -1392,7 +1783,7 @@ fields_iterator :: proc(s: ^string) -> (field: string, ok: bool) {
return "", false
}
- field = s[start:]
+ field = s[:len(s)]
ok = true
s^ = s[len(s):]
return