Update strings case convertors to be unicode compliant

author: gingerBill <bill@gingerbill.org> 2020-05-24 17:50:27 +0100
committer: gingerBill <bill@gingerbill.org> 2020-05-24 17:50:27 +0100
commit: f06efffe22136e204d85596da74bcb8c398a312d (patch)
tree: e74cd8dc27fe545f2d30666d6c287ed21af7926d /core/strings
parent: e42f7008fc66ec0eebfcf810c55152a588afb1e2 (diff)
2 files changed, 185 insertions, 158 deletions
diff --git a/core/strings/builder.odin b/core/strings/builder.odin
index 21f50d823..a772af10e 100644
--- a/core/strings/builder.odin
+++ b/core/strings/builder.odin
@@ -8,10 +8,27 @@ Builder :: struct {
 	buf: [dynamic]byte,
 }
 
-make_builder :: proc(allocator := context.allocator) -> Builder {
+make_builder_none :: proc(allocator := context.allocator) -> Builder {
 	return Builder{make([dynamic]byte, allocator)};
 }
 
+make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder {
+	return Builder{make([dynamic]byte, len, allocator)};
+}
+
+make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder {
+	return Builder{make([dynamic]byte, len, cap, allocator)};
+}
+
+make_builder :: proc{
+	make_builder_none,
+	make_builder_len,
+	make_builder_len_cap,
+};
+
+
+
+
 destroy_builder :: proc(b: ^Builder) {
 	delete(b.buf);
 	clear(&b.buf);
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index 2c1c769df..f89438c0a 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -678,8 +678,7 @@ trim_null :: proc(s: string) -> string {
 // Adjacent invalid bytes are only replaced once
 scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string {
 	str := s;
-	b := make_builder(allocator);;
-	grow_builder(&b, len(str));
+	b := make_builder(0, len(str), allocator);
 
 	has_error := false;
 	cursor := 0;
@@ -708,193 +707,204 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) ->
 	return to_string(b);
 }
 
-to_snake_case :: proc(str: string, allocator := context.allocator) -> string {
-	buf := make_builder(allocator);
 
-	last_chars: [2]rune;
-	for char, _ in str {
-		switch char {
-		case 'A'..'Z':
-			switch last_chars[1] {
-			case 'a'..'z', '0'..'9':
-				write_rune(&buf, '_');
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
-			}
-		case 'a'..'z':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				switch last_chars[0] {
-				case 'A'..'Z':
-					write_rune(&buf, '_');
-				}
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
-			case '0'..'9':
-				write_rune(&buf, '_');
-			}
-			write_rune(&buf, char);
-		case '0'..'9':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
-				write_rune(&buf, '_');
-			case 'a'..'z':
-				write_rune(&buf, '_');
-			}
-			write_rune(&buf, char);
-		case '_':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
-			}
-			write_rune(&buf, char);
-		case:
-			unimplemented();
-		}
+to_lower :: proc(s: string, allocator := context.allocator) -> string {
+	b := make_builder(0, len(s), allocator);
+	for r in s {
+		write_rune(&b, unicode.to_lower(r));
+	}
+	return to_string(b);
+}
+to_upper :: proc(s: string, allocator := context.allocator) -> string {
+	b := make_builder(0, len(s), allocator);
+	for r in s {
+		write_rune(&b, unicode.to_upper(r));
+	}
+	return to_string(b);
+}
+
+
+
+
+is_delimiter :: proc(c: rune) -> bool {
+	return c == '-' || c == '_' || is_space(c);
+}
 
-		last_chars[0] = last_chars[1];
-		last_chars[1] = char;
+is_separator :: proc(r: rune) -> bool {
+	if r <= 0x7f {
+		switch r {
+		case '0'..'9': return false;
+		case 'a'..'z': return false;
+		case 'A'..'Z': return false;
+		case '_': return false;
+		}
+		return true;
 	}
 
-	switch last_chars[1] {
-	case 'A'..'Z':
-		write_rune(&buf, last_chars[1] + ('a'-'A'));
+	// TODO(bill): unicode categories
+	// if unicode.is_letter(r) || unicode.is_digit(r) {
+	// 	return false;
+	// }
+
+	return unicode.is_space(r);
+}
+
+
+string_case_iterator :: proc(b: ^Builder, s: string, callback: proc(b: ^Builder, prev, curr, next: rune)) {
+	prev, curr: rune;
+	for next in s {
+		if curr == 0 {
+			prev = curr;
+			curr = next;
+			continue;
+		}
+
+		callback(b, prev, curr, next);
+
+		prev = curr;
+		curr = next;
 	}
 
-	return to_string(buf);
+	if len(s) > 0 {
+		callback(b, prev, curr, 0);
+	}
 }
 
-to_ada_case :: proc(str: string, allocator := context.allocator) -> string {
-	buf := make_builder(allocator);
 
-	last_chars: [2]rune;
-	for char, _ in str {
-		switch char {
-		case 'A'..'Z':
-			switch last_chars[1] {
-			case 'a'..'z', '0'..'9':
-				write_rune(&buf, '_');
-			case 'A'..'Z':
-				switch last_chars[0] {
-				case '_', '\x00':
-					write_rune(&buf, last_chars[1]);
-				case:
-					write_rune(&buf, last_chars[1] + ('a'-'A'));
-				}
+to_lower_camel_case :: to_camel_case;
+to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
+	s := trim_space(s);
+	b := make_builder(0, len(s), allocator);
+
+	string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
+		if !is_delimiter(curr) {
+			if is_delimiter(prev) {
+				write_rune(b, unicode.to_upper(curr));
+			} else if unicode.is_lower(prev) {
+				write_rune(b, curr);
+			} else {
+				write_rune(b, unicode.to_lower(curr));
 			}
-		case 'a'..'z':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				switch last_chars[0] {
-				case 'A'..'Z':
-					write_rune(&buf, '_');
-					write_rune(&buf, last_chars[1]);
-				case:
-					write_rune(&buf, last_chars[1]);
-				}
-				write_rune(&buf, char);
-			case '0'..'9':
-				write_rune(&buf, '_');
-				write_rune(&buf, char);
-			case 'a'..'z':
-				write_rune(&buf, char);
-			case '_', '\x00':
-				write_rune(&buf, char - ('a'-'A'));
+		}
+	});
+
+	return to_string(b);
+}
+
+to_upper_camel_case :: to_pascal_case;
+to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
+	s := trim_space(s);
+	b := make_builder(0, len(s), allocator);
+
+	string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
+		if !is_delimiter(curr) {
+			if is_delimiter(prev) || prev == 0 {
+				write_rune(b, unicode.to_upper(curr));
+			} else if unicode.is_lower(prev) {
+				write_rune(b, curr);
+			} else {
+				write_rune(b, unicode.to_lower(curr));
 			}
-		case '0'..'9':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
-				write_rune(&buf, '_');
-			case 'a'..'z':
-				write_rune(&buf, '_');
+		}
+	});
+
+	return to_string(b);
+}
+
+to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
+	s := trim_space(s);
+	b := make_builder(0, len(s), allocator);
+
+	adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower;
+
+	prev, curr: rune;
+
+	for next in s {
+		if is_delimiter(curr) {
+			if !is_delimiter(prev) {
+				write_rune(&b, delimiter);
 			}
-			write_rune(&buf, char);
-		case '_':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1] + ('a'-'A'));
+		} else if unicode.is_upper(curr) {
+			if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
+				write_rune(&b, delimiter);
 			}
-			write_rune(&buf, char);
-		case:
-			write_rune(&buf, char);
+			write_rune(&b, adjust_case(curr));
+		} else if curr != 0 {
+			write_rune(&b, adjust_case(curr));
 		}
 
-		last_chars[0] = last_chars[1];
-		last_chars[1] = char;
+		prev = curr;
+		curr = next;
 	}
 
-	switch last_chars[1] {
-	case 'A'..'Z':
-		write_rune(&buf, last_chars[1] + ('a'-'A'));
+	if len(s) > 0 {
+		if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
+			write_rune(&b, delimiter);
+		}
+		write_rune(&b, adjust_case(curr));
 	}
 
-	return to_string(buf);
+	return to_string(b);
 }
 
-to_screaming_snake_case :: proc(str: string, allocator := context.allocator) -> string {
-	buf := make_builder(allocator);
 
-	last_chars: [2]rune;
-	for char, _ in str {
-		switch char {
-		case 'A'..'Z':
-			switch last_chars[1] {
-			case 'a'..'z', '0'..'9':
-				write_rune(&buf, '_');
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1]);
-			}
-		case 'a'..'z':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				switch last_chars[0] {
-				case 'A'..'Z':
-					write_rune(&buf, '_');
-					write_rune(&buf, last_chars[1]);
-				case:
-					write_rune(&buf, last_chars[1]);
-				}
-				write_rune(&buf, char - ('a'-'A'));
-			case '0'..'9':
-				write_rune(&buf, '_');
-				write_rune(&buf, char - ('a'-'A'));
-			case 'a'..'z':
-				write_rune(&buf, char - ('a'-'A'));
-			case '_', '\x00':
-				write_rune(&buf, char - ('a'-'A'));
-			}
-		case '0'..'9':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1]);
-				write_rune(&buf, '_');
-			case 'a'..'z':
-				write_rune(&buf, '_');
+to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
+	return to_delimiter_case(s, '_', false, allocator);
+}
+
+to_screaming_snake_case :: to_upper_snake_case;
+to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
+	return to_delimiter_case(s, '_', true, allocator);
+}
+
+to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
+	return to_delimiter_case(s, '-', false, allocator);
+}
+
+to_upper_case :: proc(s: string, allocator := context.allocator) -> string {
+	return to_delimiter_case(s, '-', true, allocator);
+}
+
+to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
+	delimiter :: '_';
+
+	s := trim_space(s);
+	b := make_builder(0, len(s), allocator);
+
+	prev, curr: rune;
+
+	for next in s {
+		if is_delimiter(curr) {
+			if !is_delimiter(prev) {
+				write_rune(&b, delimiter);
 			}
-			write_rune(&buf, char);
-		case '_':
-			switch last_chars[1] {
-			case 'A'..'Z':
-				write_rune(&buf, last_chars[1]);
+		} else if unicode.is_upper(curr) {
+			if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
+				write_rune(&b, delimiter);
 			}
-			write_rune(&buf, char);
-		case:
-			unimplemented();
+			write_rune(&b, unicode.to_upper(curr));
+		} else if curr != 0 {
+			write_rune(&b, unicode.to_lower(curr));
 		}
 
-		last_chars[0] = last_chars[1];
-		last_chars[1] = char;
+		prev = curr;
+		curr = next;
 	}
 
-	switch last_chars[1] {
-	case 'A'..'Z':
-		write_rune(&buf, last_chars[1]);
+	if len(s) > 0 {
+		if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
+			write_rune(&b, delimiter);
+			write_rune(&b, unicode.to_upper(curr));
+		} else {
+			write_rune(&b, unicode.to_lower(curr));
+		}
 	}
 
-	return to_string(buf);
+	return to_string(b);
 }
 
+
+
 reverse :: proc(s: string, allocator := context.allocator) -> string {
 	str := s;
 	n := len(str);
author	gingerBill <bill@gingerbill.org>	2020-05-24 17:50:27 +0100
committer	gingerBill <bill@gingerbill.org>	2020-05-24 17:50:27 +0100
commit	f06efffe22136e204d85596da74bcb8c398a312d (patch)
tree	e74cd8dc27fe545f2d30666d6c287ed21af7926d /core/strings
parent	e42f7008fc66ec0eebfcf810c55152a588afb1e2 (diff)