aboutsummaryrefslogtreecommitdiff
path: root/core/strings
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2020-05-24 17:50:27 +0100
committergingerBill <bill@gingerbill.org>2020-05-24 17:50:27 +0100
commitf06efffe22136e204d85596da74bcb8c398a312d (patch)
treee74cd8dc27fe545f2d30666d6c287ed21af7926d /core/strings
parente42f7008fc66ec0eebfcf810c55152a588afb1e2 (diff)
Update strings case convertors to be unicode compliant
Diffstat (limited to 'core/strings')
-rw-r--r--core/strings/builder.odin19
-rw-r--r--core/strings/strings.odin324
2 files changed, 185 insertions, 158 deletions
diff --git a/core/strings/builder.odin b/core/strings/builder.odin
index 21f50d823..a772af10e 100644
--- a/core/strings/builder.odin
+++ b/core/strings/builder.odin
@@ -8,10 +8,27 @@ Builder :: struct {
buf: [dynamic]byte,
}
-make_builder :: proc(allocator := context.allocator) -> Builder {
+make_builder_none :: proc(allocator := context.allocator) -> Builder {
return Builder{make([dynamic]byte, allocator)};
}
+make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder {
+ return Builder{make([dynamic]byte, len, allocator)};
+}
+
+make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder {
+ return Builder{make([dynamic]byte, len, cap, allocator)};
+}
+
+make_builder :: proc{
+ make_builder_none,
+ make_builder_len,
+ make_builder_len_cap,
+};
+
+
+
+
destroy_builder :: proc(b: ^Builder) {
delete(b.buf);
clear(&b.buf);
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index 2c1c769df..f89438c0a 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -678,8 +678,7 @@ trim_null :: proc(s: string) -> string {
// Adjacent invalid bytes are only replaced once
scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string {
str := s;
- b := make_builder(allocator);;
- grow_builder(&b, len(str));
+ b := make_builder(0, len(str), allocator);
has_error := false;
cursor := 0;
@@ -708,193 +707,204 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) ->
return to_string(b);
}
-to_snake_case :: proc(str: string, allocator := context.allocator) -> string {
- buf := make_builder(allocator);
- last_chars: [2]rune;
- for char, _ in str {
- switch char {
- case 'A'..'Z':
- switch last_chars[1] {
- case 'a'..'z', '0'..'9':
- write_rune(&buf, '_');
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- }
- case 'a'..'z':
- switch last_chars[1] {
- case 'A'..'Z':
- switch last_chars[0] {
- case 'A'..'Z':
- write_rune(&buf, '_');
- }
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- case '0'..'9':
- write_rune(&buf, '_');
- }
- write_rune(&buf, char);
- case '0'..'9':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- write_rune(&buf, '_');
- case 'a'..'z':
- write_rune(&buf, '_');
- }
- write_rune(&buf, char);
- case '_':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- }
- write_rune(&buf, char);
- case:
- unimplemented();
- }
+to_lower :: proc(s: string, allocator := context.allocator) -> string {
+ b := make_builder(0, len(s), allocator);
+ for r in s {
+ write_rune(&b, unicode.to_lower(r));
+ }
+ return to_string(b);
+}
+to_upper :: proc(s: string, allocator := context.allocator) -> string {
+ b := make_builder(0, len(s), allocator);
+ for r in s {
+ write_rune(&b, unicode.to_upper(r));
+ }
+ return to_string(b);
+}
+
+
+
+
+is_delimiter :: proc(c: rune) -> bool {
+ return c == '-' || c == '_' || is_space(c);
+}
- last_chars[0] = last_chars[1];
- last_chars[1] = char;
+is_separator :: proc(r: rune) -> bool {
+ if r <= 0x7f {
+ switch r {
+ case '0'..'9': return false;
+ case 'a'..'z': return false;
+ case 'A'..'Z': return false;
+ case '_': return false;
+ }
+ return true;
}
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
+ // TODO(bill): unicode categories
+ // if unicode.is_letter(r) || unicode.is_digit(r) {
+ // return false;
+ // }
+
+ return unicode.is_space(r);
+}
+
+
+string_case_iterator :: proc(b: ^Builder, s: string, callback: proc(b: ^Builder, prev, curr, next: rune)) {
+ prev, curr: rune;
+ for next in s {
+ if curr == 0 {
+ prev = curr;
+ curr = next;
+ continue;
+ }
+
+ callback(b, prev, curr, next);
+
+ prev = curr;
+ curr = next;
}
- return to_string(buf);
+ if len(s) > 0 {
+ callback(b, prev, curr, 0);
+ }
}
-to_ada_case :: proc(str: string, allocator := context.allocator) -> string {
- buf := make_builder(allocator);
- last_chars: [2]rune;
- for char, _ in str {
- switch char {
- case 'A'..'Z':
- switch last_chars[1] {
- case 'a'..'z', '0'..'9':
- write_rune(&buf, '_');
- case 'A'..'Z':
- switch last_chars[0] {
- case '_', '\x00':
- write_rune(&buf, last_chars[1]);
- case:
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- }
+to_lower_camel_case :: to_camel_case;
+to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
+ s := trim_space(s);
+ b := make_builder(0, len(s), allocator);
+
+ string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
+ if !is_delimiter(curr) {
+ if is_delimiter(prev) {
+ write_rune(b, unicode.to_upper(curr));
+ } else if unicode.is_lower(prev) {
+ write_rune(b, curr);
+ } else {
+ write_rune(b, unicode.to_lower(curr));
}
- case 'a'..'z':
- switch last_chars[1] {
- case 'A'..'Z':
- switch last_chars[0] {
- case 'A'..'Z':
- write_rune(&buf, '_');
- write_rune(&buf, last_chars[1]);
- case:
- write_rune(&buf, last_chars[1]);
- }
- write_rune(&buf, char);
- case '0'..'9':
- write_rune(&buf, '_');
- write_rune(&buf, char);
- case 'a'..'z':
- write_rune(&buf, char);
- case '_', '\x00':
- write_rune(&buf, char - ('a'-'A'));
+ }
+ });
+
+ return to_string(b);
+}
+
+to_upper_camel_case :: to_pascal_case;
+to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
+ s := trim_space(s);
+ b := make_builder(0, len(s), allocator);
+
+ string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
+ if !is_delimiter(curr) {
+ if is_delimiter(prev) || prev == 0 {
+ write_rune(b, unicode.to_upper(curr));
+ } else if unicode.is_lower(prev) {
+ write_rune(b, curr);
+ } else {
+ write_rune(b, unicode.to_lower(curr));
}
- case '0'..'9':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
- write_rune(&buf, '_');
- case 'a'..'z':
- write_rune(&buf, '_');
+ }
+ });
+
+ return to_string(b);
+}
+
+to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
+ s := trim_space(s);
+ b := make_builder(0, len(s), allocator);
+
+ adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower;
+
+ prev, curr: rune;
+
+ for next in s {
+ if is_delimiter(curr) {
+ if !is_delimiter(prev) {
+ write_rune(&b, delimiter);
}
- write_rune(&buf, char);
- case '_':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
+ } else if unicode.is_upper(curr) {
+ if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
+ write_rune(&b, delimiter);
}
- write_rune(&buf, char);
- case:
- write_rune(&buf, char);
+ write_rune(&b, adjust_case(curr));
+ } else if curr != 0 {
+ write_rune(&b, adjust_case(curr));
}
- last_chars[0] = last_chars[1];
- last_chars[1] = char;
+ prev = curr;
+ curr = next;
}
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1] + ('a'-'A'));
+ if len(s) > 0 {
+ if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
+ write_rune(&b, delimiter);
+ }
+ write_rune(&b, adjust_case(curr));
}
- return to_string(buf);
+ return to_string(b);
}
-to_screaming_snake_case :: proc(str: string, allocator := context.allocator) -> string {
- buf := make_builder(allocator);
- last_chars: [2]rune;
- for char, _ in str {
- switch char {
- case 'A'..'Z':
- switch last_chars[1] {
- case 'a'..'z', '0'..'9':
- write_rune(&buf, '_');
- case 'A'..'Z':
- write_rune(&buf, last_chars[1]);
- }
- case 'a'..'z':
- switch last_chars[1] {
- case 'A'..'Z':
- switch last_chars[0] {
- case 'A'..'Z':
- write_rune(&buf, '_');
- write_rune(&buf, last_chars[1]);
- case:
- write_rune(&buf, last_chars[1]);
- }
- write_rune(&buf, char - ('a'-'A'));
- case '0'..'9':
- write_rune(&buf, '_');
- write_rune(&buf, char - ('a'-'A'));
- case 'a'..'z':
- write_rune(&buf, char - ('a'-'A'));
- case '_', '\x00':
- write_rune(&buf, char - ('a'-'A'));
- }
- case '0'..'9':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1]);
- write_rune(&buf, '_');
- case 'a'..'z':
- write_rune(&buf, '_');
+to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
+ return to_delimiter_case(s, '_', false, allocator);
+}
+
+to_screaming_snake_case :: to_upper_snake_case;
+to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
+ return to_delimiter_case(s, '_', true, allocator);
+}
+
+to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
+ return to_delimiter_case(s, '-', false, allocator);
+}
+
+to_upper_case :: proc(s: string, allocator := context.allocator) -> string {
+ return to_delimiter_case(s, '-', true, allocator);
+}
+
+to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
+ delimiter :: '_';
+
+ s := trim_space(s);
+ b := make_builder(0, len(s), allocator);
+
+ prev, curr: rune;
+
+ for next in s {
+ if is_delimiter(curr) {
+ if !is_delimiter(prev) {
+ write_rune(&b, delimiter);
}
- write_rune(&buf, char);
- case '_':
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1]);
+ } else if unicode.is_upper(curr) {
+ if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
+ write_rune(&b, delimiter);
}
- write_rune(&buf, char);
- case:
- unimplemented();
+ write_rune(&b, unicode.to_upper(curr));
+ } else if curr != 0 {
+ write_rune(&b, unicode.to_lower(curr));
}
- last_chars[0] = last_chars[1];
- last_chars[1] = char;
+ prev = curr;
+ curr = next;
}
- switch last_chars[1] {
- case 'A'..'Z':
- write_rune(&buf, last_chars[1]);
+ if len(s) > 0 {
+ if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
+ write_rune(&b, delimiter);
+ write_rune(&b, unicode.to_upper(curr));
+ } else {
+ write_rune(&b, unicode.to_lower(curr));
+ }
}
- return to_string(buf);
+ return to_string(b);
}
+
+
reverse :: proc(s: string, allocator := context.allocator) -> string {
str := s;
n := len(str);