diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2025-10-07 09:45:54 +0100 |
|---|---|---|
| committer | gingerBill <gingerBill@users.noreply.github.com> | 2025-10-07 09:45:54 +0100 |
| commit | fb93713f249c56258f67b7c22f0c207e4e11e246 (patch) | |
| tree | caf4b84dda6855936164ead5766bc45856bfa217 /core/text | |
| parent | 5a12ccef44972c5172bdd6e886e5bf7d69a892f1 (diff) | |
Add `@(rodata)` and `@(require_results)` to `core:text/match`
Diffstat (limited to 'core/text')
| -rw-r--r-- | core/text/match/strlib.odin | 76 |
1 files changed, 51 insertions, 25 deletions
diff --git a/core/text/match/strlib.odin b/core/text/match/strlib.odin index 819f464c5..bfb66ca5d 100644 --- a/core/text/match/strlib.odin +++ b/core/text/match/strlib.odin @@ -9,11 +9,12 @@ MAX_CAPTURES :: 32 Capture :: struct { init: int, - len: int, + len: int, } Match :: struct { - byte_start, byte_end: int, + byte_start: int, + byte_end: int, } Error :: enum { @@ -33,12 +34,13 @@ CAP_UNFINISHED :: -1 INVALID :: -1 Match_State :: struct { - src: string, + src: string, pattern: string, - level: int, + level: int, capture: [MAX_CAPTURES]Capture, } +@(require_results) match_class :: proc(c: rune, cl: rune) -> (res: bool) { switch unicode.to_lower(cl) { case 'a': res = is_alpha(c) @@ -65,19 +67,23 @@ is_punct :: unicode.is_punct is_space :: unicode.is_space is_cntrl :: unicode.is_control +@(require_results) is_alnum :: proc(c: rune) -> bool { return unicode.is_alpha(c) || unicode.is_digit(c) } +@(require_results) is_graph :: proc(c: rune) -> bool { return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) } +@(require_results) is_xdigit :: proc(c: rune) -> bool { return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) } // find the first utf8 charater and its size, return an error if the character is an error +@(require_results) utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { c, size = utf8.decode_rune_in_string(bytes) @@ -90,6 +96,7 @@ utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { // find the first utf8 charater and its size and advance the index // return an error if the character is an error +@(require_results) utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { size: int c, size = utf8.decode_rune_in_string(bytes[index^:]) @@ -103,10 +110,12 @@ utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { } // continuation byte? +@(require_results) is_cont :: proc(b: byte) -> bool { return b & 0xc0 == 0x80 } +@(require_results) utf8_prev :: proc(bytes: string, a, b: int) -> int { b := b @@ -117,6 +126,7 @@ utf8_prev :: proc(bytes: string, a, b: int) -> int { return a < b ? b - 1 : a } +@(require_results) utf8_next :: proc(bytes: string, a: int) -> int { a := a b := len(bytes) @@ -128,6 +138,7 @@ utf8_next :: proc(bytes: string, a: int) -> int { return a < b ? a + 1 : b } +@(require_results) check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) { l := int(l - '1') @@ -138,6 +149,7 @@ check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) { return l, .OK } +@(require_results) capture_to_close :: proc(ms: ^Match_State) -> (int, Error) { level := ms.level - 1 @@ -152,6 +164,7 @@ capture_to_close :: proc(ms: ^Match_State) -> (int, Error) { return 0, .Invalid_Pattern_Capture } +@(require_results) class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { step = p ch := utf8_advance(ms.pattern, &step) or_return @@ -163,7 +176,7 @@ class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { return } - utf8_advance(ms.pattern, &step) or_return + _ = utf8_advance(ms.pattern, &step) or_return case '[': // fine with step by 1 @@ -198,6 +211,7 @@ class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { return } +@(require_results) match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) { sig = true p := p @@ -240,6 +254,7 @@ match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool return } +@(require_results) single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { if s >= len(ms.src) { return @@ -261,6 +276,7 @@ single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_s return } +@(require_results) match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { if p >= len(ms.pattern) - 1 { return INVALID, .Invalid_Pattern_Capture @@ -300,6 +316,7 @@ match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) return INVALID, .OK } +@(require_results) max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { m := s @@ -331,6 +348,7 @@ max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { return INVALID, .OK } +@(require_results) min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { s := s @@ -352,6 +370,7 @@ min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { } } +@(require_results) start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) { level := ms.level @@ -366,6 +385,7 @@ start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Erro return } +@(require_results) end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) { l := capture_to_close(ms) or_return @@ -379,6 +399,7 @@ end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) { return } +@(require_results) match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) { index := check_capture(ms, char) or_return length := ms.capture[index].len @@ -390,6 +411,7 @@ match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: E return INVALID, .OK } +@(require_results) match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { s := s p := p @@ -486,6 +508,7 @@ match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { return s, .OK } +@(require_results) match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { s := s ep := class_end(ms, p) or_return @@ -521,6 +544,7 @@ match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) return s, .OK } +@(require_results) push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) { if i >= ms.level { if i == 0 { @@ -542,6 +566,7 @@ push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []M return } +@(require_results) push_captures :: proc( ms: ^Match_State, s: int, @@ -559,6 +584,7 @@ push_captures :: proc( // SPECIALS := "^$*+?.([%-" // all special characters inside a small ascii array +@(rodata) SPECIALS_TABLE := [256]bool { '^' = true, '$' = true, @@ -573,6 +599,7 @@ SPECIALS_TABLE := [256]bool { } // helper call to quick search for special characters +@(require_results) index_special :: proc(text: string) -> int { for i in 0..<len(text) { if SPECIALS_TABLE[text[i]] { @@ -583,6 +610,7 @@ index_special :: proc(text: string) -> int { return -1 } +@(require_results) lmem_find :: proc(s1, s2: string) -> int { l1 := len(s1) l2 := len(s2) @@ -618,6 +646,7 @@ lmem_find :: proc(s1, s2: string) -> int { // find a pattern with in a haystack with an offset // allow_memfind will speed up simple searches +@(require_results) find_aux :: proc( haystack: string, pattern: string, @@ -684,6 +713,7 @@ find_aux :: proc( // rest has to be used from captures // assumes captures is zeroed on first iteration // resets captures to zero on last iteration +@(require_results) gmatch :: proc( haystack: ^string, pattern: string, @@ -707,6 +737,7 @@ gmatch :: proc( } // gsub with builder, replace patterns found with the replace content +@(require_results) gsub_builder :: proc( builder: ^strings.Builder, haystack: string, @@ -746,6 +777,7 @@ gsub_builder :: proc( } // uses temp builder to build initial string - then allocates the result +@(require_results) gsub_allocator :: proc( haystack: string, pattern: string, @@ -768,12 +800,7 @@ Gsub_Proc :: proc( ) // call a procedure on every match in the haystack -gsub_with :: proc( - haystack: string, - pattern: string, - data: rawptr, - call: Gsub_Proc, -) { +gsub_with :: proc(haystack, pattern: string, data: rawptr, call: Gsub_Proc) { // find matches captures: [MAX_CAPTURES]Match haystack := haystack @@ -800,11 +827,8 @@ gsub :: proc { gsub_builder, gsub_allocator } // iterative find with zeroth capture only // assumes captures is zeroed on first iteration // resets captures to zero on last iteration -gfind :: proc( - haystack: ^string, - pattern: string, - captures: ^[MAX_CAPTURES]Match, -) -> (res: string, ok: bool) { +@(require_results) +gfind :: proc(haystack: ^string, pattern: string, captures: ^[MAX_CAPTURES]Match) -> (res: string, ok: bool) { haystack^ = haystack[captures[0].byte_end:] if len(haystack) > 0 { length, err := find_aux(haystack^, pattern, 0, true, captures) @@ -822,10 +846,8 @@ gfind :: proc( } // rebuilds a pattern into a case insensitive pattern -pattern_case_insensitive_builder :: proc( - builder: ^strings.Builder, - pattern: string, -) -> (res: string) { +@(require_results) +pattern_case_insensitive_builder :: proc(builder: ^strings.Builder, pattern: string) -> string { p := pattern last_percent: bool @@ -849,11 +871,8 @@ pattern_case_insensitive_builder :: proc( return strings.to_string(builder^) } -pattern_case_insensitive_allocator :: proc( - pattern: string, - cap: int = 256, - allocator := context.allocator, -) -> (res: string) { +@(require_results) +pattern_case_insensitive_allocator :: proc(pattern: string, cap: int = 256, allocator := context.allocator) -> string { builder := strings.builder_make(0, cap, context.temp_allocator) return pattern_case_insensitive_builder(&builder, pattern) } @@ -877,6 +896,7 @@ Matcher :: struct { } // init using haystack & pattern and an optional byte offset +@(require_results) matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) { res.haystack = haystack res.pattern = pattern @@ -886,6 +906,7 @@ matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matche } // find the first match and return the byte start / end position in the string, true on success +@(require_results) matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check { matcher.captures_length, matcher.err = find_aux( matcher.haystack, @@ -902,6 +923,7 @@ matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bound } // find the first match and return the matched word, true on success +@(require_results) matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check { matcher.captures_length, matcher.err = find_aux( matcher.haystack, @@ -917,6 +939,7 @@ matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_ } // get the capture at the "correct" spot, as spot 0 is reserved for the first match +@(require_results) matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check { runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) cap := matcher.captures[index + 1] @@ -924,6 +947,7 @@ matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) } // get the raw match out of the captures, skipping spot 0 +@(require_results) matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check { runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) return matcher.captures[index + 1] @@ -933,6 +957,7 @@ matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_locati matcher_gmatch :: matcher_match_iter // iteratively match the haystack till it cant find any matches +@(require_results) matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) { if len(matcher.iter) > 0 { matcher.captures_length, matcher.err = find_aux( @@ -962,6 +987,7 @@ matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: b } // get a slice of all valid captures above the first match +@(require_results) matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match { return matcher.captures[1:matcher.captures_length] } |