add string documentation & examples, fix & cleanup string_multi

author: Michael Kutowski <skytrias@protonmail.com> 2022-03-27 11:39:17 +0200
committer: GitHub <noreply@github.com> 2022-03-27 11:39:17 +0200
commit: 58f4d533b72d199848e4ebb291b7737312b4957a (patch)
tree: 631e9f68467baf8073b1ad41bf7f2acad80a3542 /core/strings
parent: 92f985abd5c4e5017a644266816fb2b8326157be (diff)
1 files changed, 525 insertions, 134 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index e5bd60d33..452c0ca0c 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -1,16 +1,21 @@
+// simple procedures to manipulate UTF-8 encoded strings
 package strings
 
 import "core:io"
 import "core:mem"
+import "core:slice"
 import "core:unicode"
 import "core:unicode/utf8"
 
+// returns a clone of the string `s` allocated using the `allocator`
 clone :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> string {
 	c := make([]byte, len(s), allocator, loc)
 	copy(c, s)
 	return string(c[:len(s)])
 }
 
+// returns a clone of the string `s` allocated using the `allocator` as a cstring
+// a nul byte is appended to the clone, to make the cstring safe 
 clone_to_cstring :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> cstring {
 	c := make([]byte, len(s)+1, allocator, loc)
 	copy(c, s)
@@ -18,27 +23,35 @@ clone_to_cstring :: proc(s: string, allocator := context.allocator, loc := #call
 	return cstring(&c[0])
 }
 
+// returns a string from a byte pointer `ptr` and byte length `len`
+// the string is valid as long as the parameters stay alive
 string_from_ptr :: proc(ptr: ^byte, len: int) -> string {
 	return transmute(string)mem.Raw_String{ptr, len}
 }
 
+// returns a string from a byte pointer `ptr and byte length `len`
+// searches for a nul byte from 0..<len, otherwhise `len` will be the end size
 string_from_nul_terminated_ptr :: proc(ptr: ^byte, len: int) -> string {
 	s := transmute(string)mem.Raw_String{ptr, len}
 	s = truncate_to_byte(s, 0)
 	return s
 }
 
-
+// returns the raw ^byte start of the string `str` 
 ptr_from_string :: proc(str: string) -> ^byte {
 	d := transmute(mem.Raw_String)str
 	return d.data
 }
 
+// returns the transmute of string `str` to a cstring
+// not safe since the origin string may not contain a nul byte
 unsafe_string_to_cstring :: proc(str: string) -> cstring {
 	d := transmute(mem.Raw_String)str
 	return cstring(d.data)
 }
 
+// returns a string truncated to the first time it finds the byte `b`
+// uses the `len` of the string `str` when it couldn't find the input
 truncate_to_byte :: proc(str: string, b: byte) -> string {
 	n := index_byte(str, b)
 	if n < 0 {
@@ -46,6 +59,9 @@ truncate_to_byte :: proc(str: string, b: byte) -> string {
 	}
 	return str[:n]
 }
+
+// returns a string truncated to the first time it finds the rune `r`
+// uses the `len` of the string `str` when it couldn't find the input
 truncate_to_rune :: proc(str: string, r: rune) -> string {
 	n := index_rune(str, r)
 	if n < 0 {
@@ -54,20 +70,28 @@ truncate_to_rune :: proc(str: string, r: rune) -> string {
 	return str[:n]
 }
 
+// returns a cloned string of the byte array `s` using the `allocator`
+// appends a leading nul byte
 clone_from_bytes :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> string {
 	c := make([]byte, len(s)+1, allocator, loc)
 	copy(c, s)
 	c[len(s)] = 0
 	return string(c[:len(s)])
 }
+
+// returns a clone of the cstring `s` using the `allocator` as a string
 clone_from_cstring :: proc(s: cstring, allocator := context.allocator, loc := #caller_location) -> string {
 	return clone(string(s), allocator, loc)
 }
+
+// returns a cloned string from the pointer `ptr` and a byte length `len` using the `allocator`
+// same to `string_from_ptr` but allocates
 clone_from_ptr :: proc(ptr: ^byte, len: int, allocator := context.allocator, loc := #caller_location) -> string {
 	s := string_from_ptr(ptr, len)
 	return clone(s, allocator, loc)
 }
 
+// overload to clone from a `string`, `[]byte`, `cstring` or a `^byte + length` to a string
 clone_from :: proc{
 	clone,
 	clone_from_bytes,
@@ -75,6 +99,8 @@ clone_from :: proc{
 	clone_from_ptr,
 }
 
+// returns a cloned string from the cstring `ptr` and a byte length `len` using the `allocator`
+// truncates till the first nul byte it finds or the byte len
 clone_from_cstring_bounded :: proc(ptr: cstring, len: int, allocator := context.allocator, loc := #caller_location) -> string {
 	s := string_from_ptr((^u8)(ptr), len)
 	s = truncate_to_byte(s, 0)
@@ -82,11 +108,12 @@ clone_from_cstring_bounded :: proc(ptr: cstring, len: int, allocator := context.
 }
 
 // Compares two strings, returning a value representing which one comes first lexiographically.
-// -1 for `a`; 1 for `b`, or 0 if they are equal.
+// -1 for `lhs`; 1 for `rhs`, or 0 if they are equal.
 compare :: proc(lhs, rhs: string) -> int {
 	return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs)
 }
 
+// returns the byte offset of the rune `r` in the string `s`, -1 when not found
 contains_rune :: proc(s: string, r: rune) -> int {
 	for c, offset in s {
 		if c == r {
@@ -96,20 +123,48 @@ contains_rune :: proc(s: string, r: rune) -> int {
 	return -1
 }
 
+/*
+	returns true when the string `substr` is contained inside the string `s`
+
+	strings.contains("testing", "test") -> true
+	strings.contains("testing", "ing") -> true
+	strings.contains("testing", "text") -> false
+*/
 contains :: proc(s, substr: string) -> bool {
 	return index(s, substr) >= 0
 }
 
+/*
+	returns true when the string `s` contains any of the characters inside the string `chars`
+	
+	strings.contains_any("test", "test") -> true
+	strings.contains_any("test", "ts") -> true
+	strings.contains_any("test", "et") -> true
+	strings.contains_any("test", "a") -> false
+*/
 contains_any :: proc(s, chars: string) -> bool {
 	return index_any(s, chars) >= 0
 }
 
+/*
+	returns the utf8 rune count of the string `s`
 
+	strings.rune_count("test") -> 4
+	strings.rune_count("testö") -> 5, where len("testö") -> 6
+*/
 rune_count :: proc(s: string) -> int {
 	return utf8.rune_count_in_string(s)
 }
 
+/*
+	returns wether the strings `u` and `v` are the same alpha characters
+	works with utf8 string content and ignores different casings
 
+	strings.equal_fold("test", "test") -> true
+	strings.equal_fold("Test", "test") -> true
+	strings.equal_fold("Test", "tEsT") -> true
+	strings.equal_fold("test", "tes") -> false
+*/
 equal_fold :: proc(u, v: string) -> bool {
 	s, t := u, v
 	loop: for s != "" && t != "" {
@@ -153,15 +208,39 @@ equal_fold :: proc(u, v: string) -> bool {
 	return s == t
 }
 
+/*
+	return true when the string `prefix` is contained at the start of the string `s`
+
+	strings.has_prefix("testing", "test") -> true
+	strings.has_prefix("testing", "te") -> true
+	strings.has_prefix("telephone", "te") -> true
+	strings.has_prefix("testing", "est") -> false
+*/
 has_prefix :: proc(s, prefix: string) -> bool {
 	return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
 }
 
+/*
+	returns true when the string `suffix` is contained at the end of the string `s`
+	good example to use this is for file extensions
+
+	strings.has_suffix("todo.txt", ".txt") -> true
+	strings.has_suffix("todo.doc", ".txt") -> false
+	strings.has_suffix("todo.doc.txt", ".txt") -> true
+*/
 has_suffix :: proc(s, suffix: string) -> bool {
 	return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
 }
 
+/*
+	returns a combined string from the slice of strings `a` seperated with the `sep` string
+	allocates the string using the `allocator`
 
+	a := [?]string { "a", "b", "c" }
+	b := strings.join(a[:], " ") -> "a b c"
+	c := strings.join(a[:], "-") -> "a-b-c"
+	d := strings.join(a[:], "...") -> "a...b...c"
+*/
 join :: proc(a: []string, sep: string, allocator := context.allocator) -> string {
 	if len(a) == 0 {
 		return ""
@@ -181,6 +260,14 @@ join :: proc(a: []string, sep: string, allocator := context.allocator) -> string
 	return string(b)
 }
 
+/*
+	returns a combined string from the slice of strings `a` without a seperator
+	allocates the string using the `allocator`
+	
+
+	a := [?]string { "a", "b", "c" }
+	b := strings.concatenate(a[:]) -> "abc"
+*/
 concatenate :: proc(a: []string, allocator := context.allocator) -> string {
 	if len(a) == 0 {
 		return ""
@@ -199,8 +286,13 @@ concatenate :: proc(a: []string, allocator := context.allocator) -> string {
 }
 
 /*
+
 	`rune_offset` and `rune_length` are in runes, not bytes.
 	If `rune_length` <= 0, then it'll return the remainder of the string starting with `rune_offset`.
+
+	strings.cut("some example text", 0, 4) -> "some"
+	strings.cut("some example text", 2, 2) -> "me"
+	strings.cut("some example text", 5, 7) -> "example"
 */
 cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator := context.allocator) -> (res: string) {
 	s := s; rune_length := rune_length
@@ -307,17 +399,37 @@ split_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []str
 	return _split(s, sep, 0, n, allocator)
 }
 
+/*
+	splits the string `s` after the seperator string `sep` appears
+	returns the slice of split strings allocated using `allocator`
+
+	a := "aaa.bbb.ccc.ddd.eee"
+	aa := strings.split_after(a, ".")
+	fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee]
+*/
 split_after :: proc(s, sep: string, allocator := context.allocator) -> []string {
 	return _split(s, sep, len(sep), -1, allocator)
 }
 
+/*
+	splits the string `s` after the seperator string `sep` appears into a total of `n` parts
+	returns the slice of split strings allocated using `allocator`
+
+	a := "aaa.bbb.ccc.ddd.eee"
+	aa := strings.split_after(a, ".")
+	fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee]
+*/
 split_after_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []string {
 	return _split(s, sep, len(sep), n, allocator)
 }
 
-
 @private
 _split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string, ok: bool) {
+	// stop once the string is empty or nil
+	if s == nil || len(s^) == 0 {
+		return
+	}
+
 	if sep == "" {
 		res = s[:]
 		ok = true
@@ -339,8 +451,16 @@ _split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string,
 	return
 }
 
-@private
-_split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) {
+/*
+	split the ^string `s` by the byte seperator `sep` in an iterator fashion
+	consumes the original string till the end, leaving the string `s` with len == 0
+
+	text := "a.b.c.d.e"
+	for str in strings.split_by_byte_iterator(&text, '.') {
+		fmt.eprintln(str) // every loop -> a b c d e
+	}
+*/
+split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) {
 	m := index_byte(s^, sep)
 	if m < 0 {
 		// not found
@@ -355,14 +475,28 @@ _split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool)
 	return
 }
 
-split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (string, bool) {
-	return _split_by_byte_iterator(s, sep)
-}
+/*
+	split the ^string `s` by the seperator string `sep` in an iterator fashion
+	consumes the original string till the end
 
+	text := "a.b.c.d.e"
+	for str in strings.split_iterator(&text, ".") {
+		fmt.eprintln(str) // every loop -> a b c d e
+	}
+*/
 split_iterator :: proc(s: ^string, sep: string) -> (string, bool) {
 	return _split_iterator(s, sep, 0)
 }
 
+/*
+	split the ^string `s` after every seperator string `sep` in an iterator fashion
+	consumes the original string till the end
+
+	text := "a.b.c.d.e"
+	for str in strings.split_after_iterator(&text, ".") {
+		fmt.eprintln(str) // every loop -> a. b. c. d. e
+	}
+*/
 split_after_iterator :: proc(s: ^string, sep: string) -> (string, bool) {
 	return _split_iterator(s, sep, len(sep))
 }
@@ -379,6 +513,14 @@ _trim_cr :: proc(s: string) -> string {
 	return s
 }
 
+/*
+	split the string `s` at every line break '\n'
+	return an allocated slice of strings
+
+	a := "a\nb\nc\nd\ne"
+	b := strings.split_lines(a)
+	fmt.eprintln(b) // [a, b, c, d, e]
+*/
 split_lines :: proc(s: string, allocator := context.allocator) -> []string {
 	sep :: "\n"
 	lines := _split(s, sep, 0, -1, allocator)
@@ -388,6 +530,14 @@ split_lines :: proc(s: string, allocator := context.allocator) -> []string {
 	return lines
 }
 
+/*
+	split the string `s` at every line break '\n' for `n` parts
+	return an allocated slice of strings
+
+	a := "a\nb\nc\nd\ne"
+	b := strings.split_lines_n(a, 3)
+	fmt.eprintln(b) // [a, b, c, d\ne\n]
+*/
 split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []string {
 	sep :: "\n"
 	lines := _split(s, sep, 0, n, allocator)
@@ -397,6 +547,14 @@ split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []st
 	return lines
 }
 
+/*
+	split the string `s` at every line break '\n' leaving the '\n' in the resulting strings
+	return an allocated slice of strings
+
+	a := "a\nb\nc\nd\ne"
+	b := strings.split_lines_after(a)
+	fmt.eprintln(b) // [a\n, b\n, c\n, d\n, e\n]
+*/
 split_lines_after :: proc(s: string, allocator := context.allocator) -> []string {
 	sep :: "\n"
 	lines := _split(s, sep, len(sep), -1, allocator)
@@ -406,6 +564,15 @@ split_lines_after :: proc(s: string, allocator := context.allocator) -> []string
 	return lines
 }
 
+/*
+	split the string `s` at every line break '\n' leaving the '\n' in the resulting strings
+	only runs for `n` parts
+	return an allocated slice of strings
+
+	a := "a\nb\nc\nd\ne"
+	b := strings.split_lines_after_n(a, 3)
+	fmt.eprintln(b) // [a\n, b\n, c\n, d\ne\n]
+*/
 split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) -> []string {
 	sep :: "\n"
 	lines := _split(s, sep, len(sep), n, allocator)
@@ -415,21 +582,45 @@ split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) -
 	return lines
 }
 
+/*
+	split the string `s` at every line break '\n'
+	returns the current split string every iteration till the string is consumed
+
+	text := "a\nb\nc\nd\ne"
+	for str in strings.split_lines_iterator(&text) {
+		fmt.eprintln(text) // every loop -> a b c d e	
+	}
+*/
 split_lines_iterator :: proc(s: ^string) -> (line: string, ok: bool) {
 	sep :: "\n"
 	line = _split_iterator(s, sep, 0) or_return
 	return _trim_cr(line), true
 }
 
+/*
+	split the string `s` at every line break '\n'
+	returns the current split string every iteration till the string is consumed
+
+	text := "a\nb\nc\nd\ne"
+	for str in strings.split_lines_after_iterator(&text) {
+		fmt.eprintln(text) // every loop -> a\n b\n c\n d\n e\n	
+	}
+*/
 split_lines_after_iterator :: proc(s: ^string) -> (line: string, ok: bool) {
 	sep :: "\n"
 	line = _split_iterator(s, sep, len(sep)) or_return
 	return _trim_cr(line), true
 }
 
+/*
+	returns the byte offset of the first byte `c` in the string `s` it finds, -1 when not found
+	can't find utf8 based runes
 
-
-
+	strings.index_byte("test", 't') -> 0
+	strings.index_byte("test", 'e') -> 1
+	strings.index_byte("test", 'x') -> -1
+	strings.index_byte("teäst", 'ä') -> -1
+*/
 index_byte :: proc(s: string, c: byte) -> int {
 	for i := 0; i < len(s); i += 1 {
 		if s[i] == c {
@@ -439,7 +630,15 @@ index_byte :: proc(s: string, c: byte) -> int {
 	return -1
 }
 
-// Returns -1 if c is not present
+/*
+	returns the byte offset of the last byte `c` in the string `s` it finds, -1 when not found
+	can't find utf8 based runes
+
+	strings.index_byte("test", 't') -> 3
+	strings.index_byte("test", 'e') -> 1
+	strings.index_byte("test", 'x') -> -1
+	strings.index_byte("teäst", 'ä') -> -1
+*/
 last_index_byte :: proc(s: string, c: byte) -> int {
 	for i := len(s)-1; i >= 0; i -= 1 {
 		if s[i] == c {
@@ -450,9 +649,50 @@ last_index_byte :: proc(s: string, c: byte) -> int {
 }
 
 
+/*
+	returns the byte offset of the first rune `r` in the string `s` it finds, -1 when not found
+	avoids invalid runes
+
+	strings.index_rune("abcädef", 'x') -> -1
+	strings.index_rune("abcädef", 'a') -> 0
+	strings.index_rune("abcädef", 'b') -> 1	
+	strings.index_rune("abcädef", 'c') -> 2	
+	strings.index_rune("abcädef", 'ä') -> 3	
+	strings.index_rune("abcädef", 'd') -> 5	
+	strings.index_rune("abcädef", 'e') -> 6
+	strings.index_rune("abcädef", 'f') -> 7	
+*/
+index_rune :: proc(s: string, r: rune) -> int {
+	switch {
+	case 0 <= r && r < utf8.RUNE_SELF:
+		return index_byte(s, byte(r))
+
+	case r == utf8.RUNE_ERROR:
+		for c, i in s {
+			if c == utf8.RUNE_ERROR {
+				return i
+			}
+		}
+		return -1
+
+	case !utf8.valid_rune(r):
+		return -1
+	}
+
+	b, w := utf8.encode_rune(r)
+	return index(s, string(b[:w]))
+}
 
 @private PRIME_RABIN_KARP :: 16777619
 
+/*
+	returns the byte offset of the string `substr` in the string `s`, -1 when not found
+	
+	strings.index("test", "t") -> 0
+	strings.index("test", "te") -> 0
+	strings.index("test", "st") -> 2
+	strings.index("test", "tt") -> -1
+*/
 index :: proc(s, substr: string) -> int {
 	hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
 		for i := 0; i < len(s); i += 1 {
@@ -503,6 +743,14 @@ index :: proc(s, substr: string) -> int {
 	return -1
 }
 
+/*
+	returns the last byte offset of the string `substr` in the string `s`, -1 when not found
+	
+	strings.index("test", "t") -> 3
+	strings.index("test", "te") -> 0
+	strings.index("test", "st") -> 2
+	strings.index("test", "tt") -> -1
+*/
 last_index :: proc(s, substr: string) -> int {
 	hash_str_rabin_karp_reverse :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) {
 		for i := len(s) - 1; i >= 0; i -= 1 {
@@ -551,7 +799,15 @@ last_index :: proc(s, substr: string) -> int {
 	return -1
 }
 
-// index_any returns the index of the first char of `chars` found in `s`. -1 if not found.
+/*
+	returns the index of any first char of `chars` found in `s`, -1 if not found
+	
+	strings.index_any("test", "s") -> 2
+	strings.index_any("test", "se") -> 1
+	strings.index_any("test", "et") -> 0
+	strings.index_any("test", "set") -> 0
+	strings.index_any("test", "x") -> -1
+*/
 index_any :: proc(s, chars: string) -> int {
 	if chars == "" {
 		return -1
@@ -584,6 +840,16 @@ index_any :: proc(s, chars: string) -> int {
 	return -1
 }
 
+/*
+	returns the index of any first char of `chars` found in `s`, -1 if not found
+	iterates the string in reverse
+
+	strings.index_any("test", "s") -> 2
+	strings.index_any("test", "se") -> 2
+	strings.index_any("test", "et") -> 1
+	strings.index_any("test", "set") -> 3
+	strings.index_any("test", "x") -> -1
+*/
 last_index_any :: proc(s, chars: string) -> int {
 	if chars == "" {
 		return -1
@@ -633,6 +899,16 @@ last_index_any :: proc(s, chars: string) -> int {
 	return -1
 }
 
+/*
+	returns the count of the string `substr` found in the string `s`
+	returns the rune_count + 1 of the string `s` on empty `substr`
+
+	strings.count("abbccc", "a") -> 1
+	strings.count("abbccc", "b") -> 2
+	strings.count("abbccc", "c") -> 3
+	strings.count("abbccc", "ab") -> 1
+	strings.count("abbccc", " ") -> 0
+*/
 count :: proc(s, substr: string) -> int {
 	if len(substr) == 0 { // special case
 		return rune_count(s) + 1
@@ -668,7 +944,12 @@ count :: proc(s, substr: string) -> int {
 	return n
 }
 
+/*
+	repeats the string `s` multiple `count` times and returns the allocated string
+	panics when `count` is below 0
 
+	strings.repeat("abc", 2) -> "abcabc" 
+*/
 repeat :: proc(s: string, count: int, allocator := context.allocator) -> string {
 	if count < 0 {
 		panic("strings: negative repeat count")
@@ -685,11 +966,28 @@ repeat :: proc(s: string, count: int, allocator := context.allocator) -> string
 	return string(b)
 }
 
+/*
+	replaces all instances of `old` in the string `s`	with the `new` string
+	returns the `output` string and true when an a allocation through a replace happened
+
+	strings.replace_all("xyzxyz", "xyz", "abc") -> "abcabc", true
+	strings.replace_all("xyzxyz", "abc", "xyz") -> "xyzxyz", false
+	strings.replace_all("xyzxyz", "xy", "z") -> "zzzz", true
+*/
 replace_all :: proc(s, old, new: string, allocator := context.allocator) -> (output: string, was_allocation: bool) {
 	return replace(s, old, new, -1, allocator)
 }
 
-// if n < 0, no limit on the number of replacements
+/*
+	replaces `n` instances of `old` in the string `s`	with the `new` string
+	if n < 0, no limit on the number of replacements
+	returns the `output` string and true when an a allocation through a replace happened
+
+	strings.replace("xyzxyz", "xyz", "abc", 2) -> "abcabc", true
+	strings.replace("xyzxyz", "xyz", "abc", 1) -> "abcxyz", true
+	strings.replace("xyzxyz", "abc", "xyz", -1) -> "xyzxyz", false
+	strings.replace("xyzxyz", "xy", "z", -1) -> "zzzz", true
+*/
 replace :: proc(s, old, new: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) {
 	if old == new || n == 0 {
 		was_allocation = false
@@ -730,17 +1028,35 @@ replace :: proc(s, old, new: string, n: int, allocator := context.allocator) ->
 	return
 }
 
+/*
+	removes the `key` string `n` times from the `s` string
+	if n < 0, no limit on the number of removes
+	returns the `output` string and true when an a allocation through a remove happened
+
+	strings.remove("abcabc", "abc", 1) -> "abc", true
+	strings.remove("abcabc", "abc", -1) -> "", true
+	strings.remove("abcabc", "a", -1) -> "bcbc", true
+	strings.remove("abcabc", "x", -1) -> "abcabc", false
+*/
 remove :: proc(s, key: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) {
 	return replace(s, key, "", n, allocator)
 }
 
+/*
+	removes all the `key` string instanes from the `s` string
+	returns the `output` string and true when an a allocation through a remove happened
+
+	strings.remove("abcabc", "abc") -> "", true
+	strings.remove("abcabc", "a") -> "bcbc", true
+	strings.remove("abcabc", "x") -> "abcabc", false
+*/
 remove_all :: proc(s, key: string, allocator := context.allocator) -> (output: string, was_allocation: bool) {
 	return remove(s, key, -1, allocator)
 }
 
 @(private) _ascii_space := [256]bool{'\t' = true, '\n' = true, '\v' = true, '\f' = true, '\r' = true, ' ' = true}
 
-
+// return true when the `r` rune is '\t', '\n', '\v', '\f', '\r' or ' '
 is_ascii_space :: proc(r: rune) -> bool {
 	if r < utf8.RUNE_SELF {
 		return _ascii_space[u8(r)]
@@ -748,6 +1064,7 @@ is_ascii_space :: proc(r: rune) -> bool {
 	return false
 }
 
+// returns true when the `r` rune is any asci or utf8 based whitespace
 is_space :: proc(r: rune) -> bool {
 	if r < 0x2000 {
 		switch r {
@@ -766,10 +1083,24 @@ is_space :: proc(r: rune) -> bool {
 	return false
 }
 
+// returns true when the `r` rune is a nul byte
 is_null :: proc(r: rune) -> bool {
 	return r == 0x0000
 }
 
+/*
+	runs trough the `s` string linearly and watches wether the `p` procedure matches the `truth` bool
+	returns the rune offset or -1 when no match was found
+
+	call :: proc(r: rune) -> bool {
+		return r == 'a'
+	}
+	strings.index_proc("abcabc", call) -> 0
+	strings.index_proc("cbacba", call) -> 2
+	strings.index_proc("cbacba", call, false) -> 0
+	strings.index_proc("abcabc", call, false) -> 1
+	strings.index_proc("xyz", call) -> -1
+*/
 index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
 	for r, i in s {
 		if p(r) == truth {
@@ -779,6 +1110,7 @@ index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
 	return -1
 }
 
+// same as `index_proc` but with a `p` procedure taking a rawptr for state
 index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
 	for r, i in s {
 		if p(state, r) == truth {
@@ -788,6 +1120,7 @@ index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: r
 	return -1
 }
 
+// same as `index_proc` but runs through the string in reverse
 last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int {
 	// TODO(bill): Probably use Rabin-Karp Search
 	for i := len(s); i > 0; {
@@ -800,6 +1133,7 @@ last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int
 	return -1
 }
 
+// same as `index_proc_with_state` but runs through the string in reverse
 last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int {
 	// TODO(bill): Probably use Rabin-Karp Search
 	for i := len(s); i > 0; {
@@ -811,7 +1145,17 @@ last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta
 	}
 	return -1
 }
+	
+/*
+	trims the input string `s` until the procedure `p` returns false
+	does not allocate - only returns a cut variant of the input string
+	returns an empty string when no match was found at all
 
+	find :: proc(r: rune) -> bool {
+		return r != 'i'
+	}
+	strings.trim_left_proc("testing", find) -> "ing"
+*/
 trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
 	i := index_proc(s, p, false)
 	if i == -1 {
@@ -820,29 +1164,10 @@ trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
 	return s[i:]
 }
 
-
-index_rune :: proc(s: string, r: rune) -> int {
-	switch {
-	case 0 <= r && r < utf8.RUNE_SELF:
-		return index_byte(s, byte(r))
-
-	case r == utf8.RUNE_ERROR:
-		for c, i in s {
-			if c == utf8.RUNE_ERROR {
-				return i
-			}
-		}
-		return -1
-
-	case !utf8.valid_rune(r):
-		return -1
-	}
-
-	b, w := utf8.encode_rune(r)
-	return index(s, string(b[:w]))
-}
-
-
+/*
+	trims the input string `s` until the procedure `p` with state returns false
+	returns an empty string when no match was found at all
+*/
 trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
 	i := index_proc_with_state(s, p, state, false)
 	if i == -1 {
@@ -851,6 +1176,16 @@ trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, stat
 	return s[i:]
 }
 
+/*
+	trims the input string `s` from the right until the procedure `p` returns false
+	does not allocate - only returns a cut variant of the input string
+	returns an empty string when no match was found at all
+
+	find :: proc(r: rune) -> bool {
+		return r != 't'
+	}
+	strings.trim_left_proc("testing", find) -> "test"
+*/
 trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
 	i := last_index_proc(s, p, false)
 	if i >= 0 && s[i] >= utf8.RUNE_SELF {
@@ -862,6 +1197,10 @@ trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string {
 	return s[0:i]
 }
 
+/*
+	trims the input string `s` from the right until the procedure `p` with state returns false
+	returns an empty string when no match was found at all
+*/
 trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string {
 	i := last_index_proc_with_state(s, p, state, false)
 	if i >= 0 && s[i] >= utf8.RUNE_SELF {
@@ -873,7 +1212,7 @@ trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta
 	return s[0:i]
 }
 
-
+// procedure for `trim_*_proc` variants, which has a string rawptr cast + rune comparison
 is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
 	if state == nil {
 		return false
@@ -887,7 +1226,7 @@ is_in_cutset :: proc(state: rawptr, r: rune) -> bool {
 	return false
 }
 
-
+// trims the `cutset` string from the `s` string
 trim_left :: proc(s: string, cutset: string) -> string {
 	if s == "" || cutset == "" {
 		return s
@@ -896,6 +1235,7 @@ trim_left :: proc(s: string, cutset: string) -> string {
 	return trim_left_proc_with_state(s, is_in_cutset, &state)
 }
 
+// trims the `cutset` string from the `s` string from the right
 trim_right :: proc(s: string, cutset: string) -> string {
 	if s == "" || cutset == "" {
 		return s
@@ -904,35 +1244,48 @@ trim_right :: proc(s: string, cutset: string) -> string {
 	return trim_right_proc_with_state(s, is_in_cutset, &state)
 }
 
+// trims the `cutset` string from the `s` string, both from left and right
 trim :: proc(s: string, cutset: string) -> string {
 	return trim_right(trim_left(s, cutset), cutset)
 }
 
+// trims until a valid non space rune: "\t\txyz\t\t" -> "xyz\t\t"
 trim_left_space :: proc(s: string) -> string {
 	return trim_left_proc(s, is_space)
 }
 
+// trims from the right until a valid non space rune: "\t\txyz\t\t" -> "\t\txyz"
 trim_right_space :: proc(s: string) -> string {
 	return trim_right_proc(s, is_space)
 }
 
+// trims from both sides until a valid non space rune: "\t\txyz\t\t" -> "xyz"
 trim_space :: proc(s: string) -> string {
 	return trim_right_space(trim_left_space(s))
 }
 
-
+// trims nul runes from the left: "\x00\x00testing\x00\x00" -> "testing\x00\x00"
 trim_left_null :: proc(s: string) -> string {
 	return trim_left_proc(s, is_null)
 }
 
+// trims nul runes from the right: "\x00\x00testing\x00\x00" -> "\x00\x00testing"
 trim_right_null :: proc(s: string) -> string {
 	return trim_right_proc(s, is_null)
 }
 
+// trims nul runes from both sides: "\x00\x00testing\x00\x00" -> "testing"
 trim_null :: proc(s: string) -> string {
 	return trim_right_null(trim_left_null(s))
 }
 
+/*
+	trims a `prefix` string from the start of the `s` string and returns the trimmed string
+	returns the input string `s` when no prefix was found
+
+	strings.trim_prefix("testing", "test") -> "ing"
+	strings.trim_prefix("testing", "abc") -> "testing"
+*/
 trim_prefix :: proc(s, prefix: string) -> string {
 	if has_prefix(s, prefix) {
 		return s[len(prefix):]
@@ -940,6 +1293,13 @@ trim_prefix :: proc(s, prefix: string) -> string {
 	return s
 }
 
+/*
+	trims a `suffix` string from the end of the `s` string and returns the trimmed string
+	returns the input string `s` when no suffix was found
+
+	strings.trim_suffix("todo.txt", ".txt") -> "todo"
+	strings.trim_suffix("todo.doc", ".txt") -> "todo.doc"
+*/
 trim_suffix :: proc(s, suffix: string) -> string {
 	if has_suffix(s, suffix) {
 		return s[:len(s)-len(suffix)]
@@ -947,142 +1307,151 @@ trim_suffix :: proc(s, suffix: string) -> string {
 	return s
 }
 
-split_multi :: proc(s: string, substrs: []string, skip_empty := false, allocator := context.allocator) -> []string #no_bounds_check {
+/*
+	splits the input string `s` by all possible `substrs` []string
+	returns the allocated []string, nil on any empty substring or no matches
+
+	splits := [?]string { "---", "~~~", ".", "_", "," }
+	res := strings.split_multi("testing,this.out_nice---done~~~last", splits[:])
+	fmt.eprintln(res) // -> [testing, this, out, nice, done, last]
+*/
+split_multi :: proc(s: string, substrs: []string, allocator := context.allocator) -> (buf: []string) #no_bounds_check {
 	if s == "" || len(substrs) <= 0 {
-		return nil
+		return
 	}
 
-	sublen := len(substrs[0])
-
-	for substr in substrs[1:] {
-		sublen = min(sublen, len(substr))
+	// disallow "" substr
+	for substr in substrs {
+		if len(substr) == 0 {
+			return
+		}
 	}
 
-	shared := len(s) - sublen
+	// TODO maybe remove duplicate substrs
+	// sort substrings by string size, largest to smallest
+	temp_substrs := slice.clone(substrs, context.temp_allocator)
+	slice.sort_by(temp_substrs, proc(a, b: string) -> bool {
+		return len(a) > len(b)	
+	})
 
-	if shared <= 0 {
-		return nil
-	}
+	substrings_found: int
+	temp := s
 
-	// number, index, last
-	n, i, l := 0, 0, 0
-
-	// count results
-	first_pass: for i <= shared {
-		for substr in substrs {
-			if s[i:i+sublen] == substr {
-				if !skip_empty || i - l > 0 {
-					n += 1
-				}
-
-				i += sublen
-				l  = i
+	// count substr results found in string
+	first_pass: for len(temp) > 0 {
+		for substr in temp_substrs {
+			size := len(substr)
 
+			// check range and compare string to substr
+			if size <= len(temp) && temp[:size] == substr {
+				substrings_found += 1
+				temp = temp[size:]
 				continue first_pass
 			}
 		}
 
-		_, skip := utf8.decode_rune_in_string(s[i:])
-		i += skip
+		// step through string
+		_, skip := utf8.decode_rune_in_string(temp[:])
+		temp = temp[skip:]
 	}
 
-	if !skip_empty || len(s) - l > 0 {
-		n += 1
+	// skip when no results
+	if substrings_found < 1 {
+		return 
 	}
 
-	if n < 1 {
-		// no results
-		return nil
-	}
-
-	buf := make([]string, n, allocator)
-
-	n, i, l = 0, 0, 0
-
-	// slice results
-	second_pass: for i <= shared {
-		for substr in substrs {
-			if s[i:i+sublen] == substr {
-				if !skip_empty || i - l > 0 {
-					buf[n] = s[l:i]
-					n += 1
-				}
+	buf = make([]string, substrings_found + 1, allocator)
+	buf_index: int
+	temp = s
+	temp_old := temp
 
-				i += sublen
-				l  = i
+	// gather results in the same fashion
+	second_pass: for len(temp) > 0 {
+		for substr in temp_substrs {
+			size := len(substr)
 
+			// check range and compare string to substr
+			if size <= len(temp) && temp[:size] == substr {
+				buf[buf_index] = temp_old[:len(temp_old) - len(temp)]
+				buf_index += 1
+				temp = temp[size:]
+				temp_old = temp
 				continue second_pass
 			}
 		}
 
-		_, skip := utf8.decode_rune_in_string(s[i:])
-		i += skip
+		// step through string
+		_, skip := utf8.decode_rune_in_string(temp[:])
+		temp = temp[skip:]
 	}
 
-	if !skip_empty || len(s) - l > 0 {
-		buf[n] = s[l:]
-	}
+	buf[buf_index] = temp_old[:]
 
 	return buf
 }
 
+// state for the split multi iterator
+Split_Multi :: struct {
+	temp: string,
+	temp_old: string,
+	substrs: []string,
+}
 
+// returns split multi state with sorted `substrs`
+split_multi_init :: proc(s: string, substrs: []string) -> Split_Multi {
+	// sort substrings, largest to smallest
+	temp_substrs := slice.clone(substrs, context.temp_allocator)
+	slice.sort_by(temp_substrs, proc(a, b: string) -> bool {
+		return len(a) > len(b)	
+	})	
 
-
-split_multi_iterator :: proc(s: ^string, substrs: []string, skip_empty := false) -> (string, bool) #no_bounds_check {
-	if s == nil || s^ == "" || len(substrs) <= 0 {
-		return "", false
-	}
-
-	sublen := len(substrs[0])
-
-	for substr in substrs[1:] {
-		sublen = min(sublen, len(substr))
+	return {
+		temp = s,
+		temp_old = s,
+		substrs = temp_substrs,
 	}
+}
 
-	shared := len(s) - sublen
+/*
+	splits the input string `s` by all possible `substrs` []string in an iterator fashion
+	returns the split string every iteration, the full string on no match
 
-	if shared <= 0 {
-		return "", false
+	splits := [?]string { "---", "~~~", ".", "_", "," }
+	state := strings.split_multi_init("testing,this.out_nice---done~~~last", splits[:])
+	for str in strings.split_multi_iterate(&state) {
+		fmt.eprintln(str) // every iteration -> [testing, this, out, nice, done, last]
 	}
-
-	// index, last
-	i, l := 0, 0
-
-	loop: for i <= shared {
+*/
+split_multi_iterate :: proc(using sm: ^Split_Multi) -> (res: string, ok: bool) #no_bounds_check {
+	pass: for len(temp) > 0 {
 		for substr in substrs {
-			if s[i:i+sublen] == substr {
-				if !skip_empty || i - l > 0 {
-					res := s[l:i]
-					s^ = s[i:]
-					return res, true
-				}
+			size := len(substr)
 
-				i += sublen
-				l  = i
-
-				continue loop
+			// check range and compare string to substr
+			if size <= len(temp) && temp[:size] == substr {
+				res = temp_old[:len(temp_old) - len(temp)]
+				temp = temp[size:]
+				temp_old = temp
+				ok = true
+				return 	
 			}
 		}
 
-		_, skip := utf8.decode_rune_in_string(s[i:])
-		i += skip
+		// step through string
+		_, skip := utf8.decode_rune_in_string(temp[:])
+		temp = temp[skip:]
 	}
 
-	if !skip_empty || len(s) - l > 0 {
-		res := s[l:]
-		s^ = s[len(s):]
-		return res, true
+	// allow last iteration
+	if temp_old != "" {
+		res = temp_old[:]	
+		ok = true
+		temp_old = ""
 	}
 
-	return "", false
+	return
 }
 
-
-
-
-
-
 // scrub scruvs invalid utf-8 characters and replaces them with the replacement string
 // Adjacent invalid bytes are only replaced once
 scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string {
@@ -1117,7 +1486,13 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) ->
 	return to_string(b)
 }
 
+/*
+	returns a reversed version of the `s` string
 
+	a := "abcxyz"
+	b := strings.reverse(a)
+	fmt.eprintln(a, b) // abcxyz zyxcba
+*/
 reverse :: proc(s: string, allocator := context.allocator) -> string {
 	str := s
 	n := len(str)
@@ -1133,12 +1508,19 @@ reverse :: proc(s: string, allocator := context.allocator) -> string {
 	return string(buf)
 }
 
+/*
+	expands the string to a grid spaced by `tab_size` whenever a `\t` character appears
+	returns the tabbed string, panics on tab_size <= 0
+
+	strings.expand_tabs("abc1\tabc2\tabc3", 4) -> abc1    abc2    abc3
+	strings.expand_tabs("abc1\tabc2\tabc3", 5) -> abc1 abc2 abc3
+	strings.expand_tabs("abc1\tabc2\tabc3", 6) -> abc1  abc2  abc3
+*/
 expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) -> string {
 	if tab_size <= 0 {
 		panic("tab size must be positive")
 	}
 
-
 	if s == "" {
 		return ""
 	}
@@ -1176,7 +1558,16 @@ expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) ->
 	return to_string(b)
 }
 
-
+/*
+	splits the `str` string by the seperator `sep` string and returns 3 parts
+	`head`: before the split, `match`: the seperator, `tail`: the end of the split
+	returns the input string when the `sep` was not found
+
+	text := "testing this out"
+	strings.partition(text, " this ") -> head: "testing", match: " this ", tail: "out"
+	strings.partition(text, "hi") -> head: "testing t", match: "hi", tail: "s out"
+	strings.partition(text, "xyz") -> head: "testing this out", match: "", tail: ""
+*/
 partition :: proc(str, sep: string) -> (head, match, tail: string) {
 	i := index(str, sep)
 	if i == -1 {
@@ -1392,7 +1783,7 @@ fields_iterator :: proc(s: ^string) -> (field: string, ok: bool) {
 		return "", false
 	}
 
-	field = s[start:]
+	field = s[:len(s)]
 	ok = true
 	s^ = s[len(s):]
 	return
author	Michael Kutowski <skytrias@protonmail.com>	2022-03-27 11:39:17 +0200
committer	GitHub <noreply@github.com>	2022-03-27 11:39:17 +0200
commit	58f4d533b72d199848e4ebb291b7737312b4957a (patch)
tree	631e9f68467baf8073b1ad41bf7f2acad80a3542 /core/strings
parent	92f985abd5c4e5017a644266816fb2b8326157be (diff)