aboutsummaryrefslogtreecommitdiff
path: root/core/text/regex/regex.odin
diff options
context:
space:
mode:
authorFeoramund <161657516+Feoramund@users.noreply.github.com>2025-05-23 20:20:59 -0400
committerFeoramund <161657516+Feoramund@users.noreply.github.com>2025-05-24 07:23:04 -0400
commitfedb9efb413dda3f2d9d4bbb39050219f8c0f394 (patch)
tree065a9ea9b253baace469aad0cce38645e444101f /core/text/regex/regex.odin
parenta2c0720fb046187bb00f5f3beeab4e9c284b18f0 (diff)
Make RegEx VM restartable and fix iterator infinite loop
Diffstat (limited to 'core/text/regex/regex.odin')
-rw-r--r--core/text/regex/regex.odin67
1 files changed, 62 insertions, 5 deletions
diff --git a/core/text/regex/regex.odin b/core/text/regex/regex.odin
index c805740f7..90aa34946 100644
--- a/core/text/regex/regex.odin
+++ b/core/text/regex/regex.odin
@@ -77,6 +77,8 @@ Match_Iterator :: struct {
vm: virtual_machine.Machine,
idx: int,
temp: runtime.Allocator,
+ threads: int,
+ done: bool,
}
/*
@@ -101,7 +103,6 @@ create :: proc(
permanent_allocator := context.allocator,
temporary_allocator := context.temp_allocator,
) -> (result: Regular_Expression, err: Error) {
-
// For the sake of speed and simplicity, we first run all the intermediate
// processes such as parsing and compilation through the temporary
// allocator.
@@ -294,6 +295,7 @@ create_iterator :: proc(
result.temp = temporary_allocator
result.vm = virtual_machine.create(result.regex.program, str)
result.vm.class_data = result.regex.class_data
+ result.threads = max(1, virtual_machine.opcode_count(result.vm.code) - 1)
return
}
@@ -457,8 +459,27 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
assert(len(it.capture.pos) >= common.MAX_CAPTURE_GROUPS,
"Pre-allocated RegEx capture `pos` must be at least 10 elements long.")
+ // Guard against situations in which the iterator should finish.
+ if it.done {
+ return
+ }
+
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+ if it.idx > 0 {
+ // Reset the state needed to `virtual_machine.run` again.
+ it.vm.top_thread = 0
+ it.vm.current_rune = rune(0)
+ it.vm.current_rune_size = 0
+ for i in 0..<it.threads {
+ it.vm.threads[i] = {}
+ it.vm.next_threads[i] = {}
+ }
+ }
+
+ // Take note of where the string pointer is before we start.
+ sp_before := it.vm.string_pointer
+
saved: ^[2 * common.MAX_CAPTURE_GROUPS]int
{
context.allocator = it.temp
@@ -469,6 +490,28 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
}
}
+ if !ok {
+ // Match failed, bail out.
+ return
+ }
+
+ if it.vm.string_pointer == sp_before {
+ // The string pointer did not move, but there was a match.
+ //
+ // At this point, the pattern supplied to the iterator will infinitely
+ // loop if we do not intervene.
+ it.done = true
+ }
+ if it.vm.string_pointer == len(it.vm.memory) {
+ // The VM hit the end of the string.
+ //
+ // We do not check at the start, because a match of pattern `$`
+ // against string "" is valid and must return a match.
+ //
+ // This check prevents a double-match of `$` against a non-empty string.
+ it.done = true
+ }
+
str := string(it.vm.memory)
num_groups: int
@@ -488,9 +531,7 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
num_groups = n
}
- defer if ok {
- it.idx += 1
- }
+ defer it.idx += 1
if num_groups > 0 {
result = {it.capture.pos[:num_groups], it.capture.groups[:num_groups]}
@@ -504,8 +545,24 @@ match :: proc {
match_iterator,
}
+/*
+Reset an iterator, allowing it to be run again as if new.
+
+Inputs:
+- it: The iterator to reset.
+*/
reset :: proc(it: ^Match_Iterator) {
- it.idx = 0
+ it.done = false
+ it.idx = 0
+ it.vm.string_pointer = 0
+
+ it.vm.top_thread = 0
+ it.vm.current_rune = rune(0)
+ it.vm.current_rune_size = 0
+ for i in 0..<it.threads {
+ it.vm.threads[i] = {}
+ it.vm.next_threads[i] = {}
+ }
}
/*