diff options
| author | gingerBill <gingerBill@users.noreply.github.com> | 2023-01-01 13:10:49 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-01-01 13:10:49 +0000 |
| commit | c1384afe2fd705ce075277aa8dc6bc259dc94cdc (patch) | |
| tree | eac021b897fe6525a076264d5545aac6c96cfbb5 /core | |
| parent | 00823ca88c870d49186dbcaa21c54384c0b9364f (diff) | |
| parent | 547c7bce1b28757415c553830a18d94636cedbf8 (diff) | |
Merge branch 'master' into compiler-improvements-2022-12
Diffstat (limited to 'core')
| -rw-r--r-- | core/mem/allocators.odin | 2 | ||||
| -rw-r--r-- | core/mem/mem.odin | 10 | ||||
| -rw-r--r-- | core/mem/virtual/arena.odin | 4 | ||||
| -rw-r--r-- | core/os/dir_darwin.odin | 3 | ||||
| -rw-r--r-- | core/os/os_darwin.odin | 22 | ||||
| -rw-r--r-- | core/runtime/core.odin | 6 | ||||
| -rw-r--r-- | core/runtime/default_temporary_allocator.odin | 4 | ||||
| -rw-r--r-- | core/text/match/strlib.odin | 959 |
8 files changed, 994 insertions, 16 deletions
diff --git a/core/mem/allocators.odin b/core/mem/allocators.odin index fc009621b..66da12959 100644 --- a/core/mem/allocators.odin +++ b/core/mem/allocators.odin @@ -153,7 +153,7 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode, s := (^Scratch_Allocator)(allocator_data) if s.data == nil { - DEFAULT_BACKING_SIZE :: 1<<22 + DEFAULT_BACKING_SIZE :: 4 * Megabyte if !(context.allocator.procedure != scratch_allocator_proc && context.allocator.data != allocator_data) { panic("cyclic initialization of the scratch allocator with itself") diff --git a/core/mem/mem.odin b/core/mem/mem.odin index f7be69adc..bc77ca287 100644 --- a/core/mem/mem.odin +++ b/core/mem/mem.odin @@ -3,11 +3,11 @@ package mem import "core:runtime" import "core:intrinsics" -Byte :: 1 -Kilobyte :: 1024 * Byte -Megabyte :: 1024 * Kilobyte -Gigabyte :: 1024 * Megabyte -Terabyte :: 1024 * Gigabyte +Byte :: runtime.Byte +Kilobyte :: runtime.Kilobyte +Megabyte :: runtime.Megabyte +Gigabyte :: runtime.Gigabyte +Terabyte :: runtime.Terabyte set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr { return runtime.memset(data, i32(value), len) diff --git a/core/mem/virtual/arena.odin b/core/mem/virtual/arena.odin index e901cf6f3..0ddb116fd 100644 --- a/core/mem/virtual/arena.odin +++ b/core/mem/virtual/arena.odin @@ -19,11 +19,11 @@ Arena :: struct { // 1 MiB should be enough to start with -DEFAULT_ARENA_STATIC_COMMIT_SIZE :: 1<<20 +DEFAULT_ARENA_STATIC_COMMIT_SIZE :: mem.Megabyte DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: DEFAULT_ARENA_STATIC_COMMIT_SIZE // 1 GiB on 64-bit systems, 128 MiB on 32-bit systems by default -DEFAULT_ARENA_STATIC_RESERVE_SIZE :: 1<<30 when size_of(uintptr) == 8 else 1<<27 +DEFAULT_ARENA_STATIC_RESERVE_SIZE :: mem.Gigabyte when size_of(uintptr) == 8 else 128 * mem.Megabyte diff --git a/core/os/dir_darwin.odin b/core/os/dir_darwin.odin index 1f54ef1ab..7d0f2936d 100644 --- a/core/os/dir_darwin.odin +++ b/core/os/dir_darwin.odin @@ -14,11 +14,12 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F dirpath: string dirpath, err = absolute_path_from_handle(fd) - if err != ERROR_NONE { return } + defer delete(dirpath) + n := n size := n if n <= 0 { diff --git a/core/os/os_darwin.odin b/core/os/os_darwin.odin index 4c32323ff..b40edb410 100644 --- a/core/os/os_darwin.odin +++ b/core/os/os_darwin.odin @@ -342,21 +342,33 @@ get_last_error_string :: proc() -> string { } open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno) { + isDir := is_dir_path(path) + flags := flags + if isDir { + /* + @INFO(Platin): To make it impossible to use the wrong flag for dir's + as you can't write to a dir only read which makes it fail to open + */ + flags = O_RDONLY + } + cstr := strings.clone_to_cstring(path, context.temp_allocator) handle := _unix_open(cstr, i32(flags), u16(mode)) if handle == -1 { - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)get_last_error() } -when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { - if mode != 0 { + /* + @INFO(Platin): this is only done because O_CREATE for some reason fails to apply mode + should not happen if the handle is a directory + */ + if mode != 0 && !isDir { err := fchmod(handle, cast(u16)mode) if err != 0 { _unix_close(handle) - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)err } } -} return handle, 0 } diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 108609f78..a74bf4285 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -329,6 +329,12 @@ Allocator :: struct { data: rawptr, } +Byte :: 1 +Kilobyte :: 1024 * Byte +Megabyte :: 1024 * Kilobyte +Gigabyte :: 1024 * Megabyte +Terabyte :: 1024 * Gigabyte + // Logging stuff Logger_Level :: enum uint { diff --git a/core/runtime/default_temporary_allocator.odin b/core/runtime/default_temporary_allocator.odin index 176634ff9..b71cd103a 100644 --- a/core/runtime/default_temporary_allocator.odin +++ b/core/runtime/default_temporary_allocator.odin @@ -1,6 +1,6 @@ package runtime -DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 1<<22) +DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte) when ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR { @@ -197,4 +197,4 @@ default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator procedure = default_temp_allocator_proc, data = allocator, } -}
\ No newline at end of file +} diff --git a/core/text/match/strlib.odin b/core/text/match/strlib.odin new file mode 100644 index 000000000..b8c2861fa --- /dev/null +++ b/core/text/match/strlib.odin @@ -0,0 +1,959 @@ +package text_match + +import "core:runtime" +import "core:unicode" +import "core:unicode/utf8" +import "core:strings" + +MAX_CAPTURES :: 32 + +Capture :: struct { + init: int, + len: int, +} + +Match :: struct { + byte_start, byte_end: int, +} + +Error :: enum { + OK, + OOB, + Invalid_Capture_Index, + Invalid_Pattern_Capture, + Unfinished_Capture, + Malformed_Pattern, + Rune_Error, + Match_Invalid, +} + +L_ESC :: '%' +CAP_POSITION :: -2 +CAP_UNFINISHED :: -1 +INVALID :: -1 + +Match_State :: struct { + src: string, + pattern: string, + level: int, + capture: [MAX_CAPTURES]Capture, +} + +match_class :: proc(c: rune, cl: rune) -> (res: bool) { + switch unicode.to_lower(cl) { + case 'a': res = is_alpha(c) + case 'c': res = is_cntrl(c) + case 'd': res = is_digit(c) + case 'g': res = is_graph(c) + case 'l': res = is_lower(c) + case 'p': res = is_punct(c) + case 's': res = is_space(c) + case 'u': res = is_upper(c) + case 'w': res = is_alnum(c) + case 'x': res = is_xdigit(c) + case: return cl == c + } + + return is_lower(cl) ? res : !res +} + +is_alpha :: unicode.is_alpha +is_digit :: unicode.is_digit +is_lower :: unicode.is_lower +is_upper :: unicode.is_upper +is_punct :: unicode.is_punct +is_space :: unicode.is_space +is_cntrl :: unicode.is_control + +is_alnum :: proc(c: rune) -> bool { + return unicode.is_alpha(c) || unicode.is_digit(c) +} + +is_graph :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) +} + +is_xdigit :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) +} + +// find the first utf8 charater and its size, return an error if the character is an error +utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { + c, size = utf8.decode_rune_in_string(bytes) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error + } + + return +} + +// find the first utf8 charater and its size and advance the index +// return an error if the character is an error +utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { + size: int + c, size = utf8.decode_rune_in_string(bytes[index^:]) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error + } + + index^ += size + return +} + +// continuation byte? +is_cont :: proc(b: byte) -> bool { + return b & 0xc0 == 0x80 +} + +utf8_prev :: proc(bytes: string, a, b: int) -> int { + b := b + + for a < b && is_cont(bytes[b - 1]) { + b -= 1 + } + + return a < b ? b - 1 : a +} + +utf8_next :: proc(bytes: string, a: int) -> int { + a := a + b := len(bytes) + + for a < b - 1 && is_cont(bytes[a + 1]) { + a += 1 + } + + return a < b ? a + 1 : b +} + +check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) { + l := int(l - '1') + + if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED { + return 0, .Invalid_Capture_Index + } + + return l, .OK +} + +capture_to_close :: proc(ms: ^Match_State) -> (int, Error) { + level := ms.level - 1 + + for level >= 0 { + if ms.capture[level].len == CAP_UNFINISHED { + return level, .OK + } + + level -= 1 + } + + return 0, .Invalid_Pattern_Capture +} + +class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { + step = p + ch := utf8_advance(ms.pattern, &step) or_return + + switch ch { + case L_ESC: + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } + + utf8_advance(ms.pattern, &step) or_return + + case '[': + // fine with step by 1 + if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' { + step += 1 + } + + // run till end is reached + for { + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } + + if ms.pattern[step] == ']' { + break + } + + // dont care about utf8 here + step += 1 + + if step < len(ms.pattern) && ms.pattern[step] == L_ESC { + // skip escapes like '%' + step += 1 + } + } + + // advance last time + step += 1 + } + + return +} + +match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) { + sig = true + p := p + + if ms.pattern[p + 1] == '^' { + p += 1 + sig = false + } + + // while inside of class range + for p < ec { + char := utf8_advance(ms.pattern, &p) or_return + + // e.g. %a + if char == L_ESC { + next := utf8_advance(ms.pattern, &p) or_return + + if match_class(c, next) { + return + } + } else { + next, next_size := utf8_peek(ms.pattern[p:]) or_return + + // TODO test case for [a-???] where ??? is missing + if next == '-' && p + next_size < len(ms.pattern) { + // advance 2 codepoints + p += next_size + last := utf8_advance(ms.pattern, &p) or_return + + if char <= c && c <= last { + return + } + } else if char == c { + return + } + } + } + + sig = !sig + return +} + +single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { + if s >= len(ms.src) { + return + } + + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + schar, ssize := utf8_peek(ms.src[s:]) or_return + schar_size = ssize + + switch pchar { + case '.': matched = true + case L_ESC: + pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return + matched = match_class(schar, pchar_next) + case '[': matched = match_bracket_class(ms, schar, p, ep - 1) or_return + case: matched = schar == pchar + } + + return +} + +match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + if p >= len(ms.pattern) - 1 { + return INVALID, .Invalid_Pattern_Capture + } + + schar, ssize := utf8_peek(ms.src[s:]) or_return + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + + // skip until the src and pattern match + if schar != pchar { + return INVALID, .OK + } + + s_begin := s + cont := 1 + s := s + ssize + begin := pchar + end, _ := utf8_peek(ms.pattern[p + psize:]) or_return + + for s < len(ms.src) { + ch := utf8_advance(ms.src, &s) or_return + + switch ch{ + case end: + cont -= 1 + + if cont == 0 { + return s, .OK + } + + case begin: + cont += 1 + } + } + + return INVALID, .OK +} + +max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { + m := s + + // count up matches + for { + matched, size := single_match(ms, m, p, ep) or_return + + if !matched { + break + } + + m += size + } + + for s <= m { + result := match(ms, m, ep + 1) or_return + + if result != INVALID { + return result, .OK + } + + if s == m { + break + } + + m = utf8_prev(ms.src, s, m) + } + + return INVALID, .OK +} + +min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { + s := s + + for { + result := match(ms, s, ep + 1) or_return + + if result != INVALID { + return result, .OK + } else { + // TODO receive next step maybe? + matched, rune_size := single_match(ms, s, p, ep) or_return + + if matched { + s += rune_size + } else { + return INVALID, .OK + } + } + } +} + +start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) { + level := ms.level + + ms.capture[level].init = s + ms.capture[level].len = what + ms.level += 1 + + res = match(ms, s, p) or_return + if res == INVALID { + ms.level -= 1 + } + return +} + +end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) { + l := capture_to_close(ms) or_return + + // TODO double check, could do string as int index + ms.capture[l].len = s - ms.capture[l].init + + res = match(ms, s, p) or_return + if res == INVALID { + ms.capture[l].len = CAP_UNFINISHED + } + return +} + +match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) { + index := check_capture(ms, char) or_return + length := ms.capture[index].len + + if len(ms.src) - s >= length { + return s + length, .OK + } + + return INVALID, .OK +} + +match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + s := s + p := p + + if p == len(ms.pattern) { + return s, .OK + } + + // NOTE we can walk by ascii steps if we know the characters are ascii + char, _ := utf8_peek(ms.pattern[p:]) or_return + switch char { + case '(': + if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' { + s = start_capture(ms, s, p + 2, CAP_POSITION) or_return + } else { + s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return + } + + case ')': + s = end_capture(ms, s, p + 1) or_return + + case '$': + if p + 1 != len(ms.pattern) { + return match_default(ms, s, p) + } + + if len(ms.src) != s { + s = INVALID + } + + case L_ESC: + // stop short patterns like "%" only + if p + 1 >= len(ms.pattern) { + err = .OOB + return + } + + switch ms.pattern[p + 1] { + // balanced string + case 'b': + s = match_balance(ms, s, p + 2) or_return + + if s != INVALID { + // eg after %b() + return match(ms, s, p + 4) + } + + // frontier + case 'f': + p += 2 + + if ms.pattern[p] != '[' { + return INVALID, .Invalid_Pattern_Capture + } + + ep := class_end(ms, p) or_return + previous, current: rune + + // get previous + if s != 0 { + temp := utf8_prev(ms.src, 0, s) + previous, _ = utf8_peek(ms.src[temp:]) or_return + } + + // get current + if s != len(ms.src) { + current, _ = utf8_peek(ms.src[s:]) or_return + } + + m1 := match_bracket_class(ms, previous, p, ep - 1) or_return + m2 := match_bracket_class(ms, current, p, ep - 1) or_return + + if !m1 && m2 { + return match(ms, s, ep) + } + + s = INVALID + + // capture group + case '0'..<'9': + s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return + + if s != INVALID { + return match(ms, s, p + 2) + } + + case: return match_default(ms, s, p) + } + + case: + return match_default(ms, s, p) + } + + return s, .OK +} + +match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + s := s + ep := class_end(ms, p) or_return + single_matched, ssize := single_match(ms, s, p, ep) or_return + + if !single_matched { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + + switch epc { + case '*', '?', '-': return match(ms, s, ep + 1) + case: s = INVALID + } + } else { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + + switch epc { + case '?': + result := match(ms, s + ssize, ep + 1) or_return + + if result != INVALID { + s = result + } else { + return match(ms, s, ep + 1) + } + + case '+': s = max_expand(ms, s + ssize, p, ep) or_return + case '*': s = max_expand(ms, s, p, ep) or_return + case '-': s = min_expand(ms, s, p, ep) or_return + case: return match(ms, s + ssize, ep) + } + } + + return s, .OK +} + +push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) { + if i >= ms.level { + if i == 0 { + matches[0] = { 0, e - s } + } else { + err = .Invalid_Capture_Index + } + } else { + init := ms.capture[i].init + length := ms.capture[i].len + + switch length { + case CAP_UNFINISHED: err = .Unfinished_Capture + case CAP_POSITION: matches[i] = { init, init + 1 } + case: matches[i] = { init, init + length } + } + } + + return +} + +push_captures :: proc( + ms: ^Match_State, + s: int, + e: int, + matches: []Match, +) -> (nlevels: int, err: Error) { + nlevels = 1 if ms.level == 0 && s != -1 else ms.level + + for i in 0..<nlevels { + push_onecapture(ms, i, s, e, matches) or_return + } + + return +} + +// SPECIALS := "^$*+?.([%-" +// all special characters inside a small ascii array +SPECIALS_TABLE := [256]bool { + '^' = true, + '$' = true, + '*' = true, + '+' = true, + '?' = true, + '.' = true, + '(' = true, + '[' = true, + '%' = true, + '-' = true, +} + +// helper call to quick search for special characters +index_special :: proc(text: string) -> int { + for i in 0..<len(text) { + if SPECIALS_TABLE[text[i]] { + return i + } + } + + return -1 +} + +lmem_find :: proc(s1, s2: string) -> int { + l1 := len(s1) + l2 := len(s2) + + if l2 == 0 { + return 0 + } else if l2 > l1 { + return -1 + } else { + init := strings.index_byte(s1, s2[0]) + end := init + l2 + + for end <= l1 && init != -1 { + init += 1 + + if s1[init - 1:end] == s2 { + return init - 1 + } else { + next := strings.index_byte(s1[init:], s2[0]) + + if next == -1 { + return -1 + } else { + init = init + next + end = init + l2 + } + } + } + } + + return -1 +} + +// find a pattern with in a haystack with an offset +// allow_memfind will speed up simple searches +find_aux :: proc( + haystack: string, + pattern: string, + offset: int, + allow_memfind: bool, + matches: ^[MAX_CAPTURES]Match, +) -> (captures: int, err: Error) { + s := offset + p := 0 + + specials_idx := index_special(pattern) + if allow_memfind && specials_idx == -1 { + if index := lmem_find(haystack[s:], pattern); index != -1 { + matches[0] = { index + s, index + s + len(pattern) } + captures = 1 + return + } else { + return + } + } + + pattern := pattern + anchor: bool + if len(pattern) > 0 && pattern[0] == '^' { + anchor = true + pattern = pattern[1:] + } + + ms := Match_State { + src = haystack, + pattern = pattern, + } + + for { + res := match(&ms, s, p) or_return + + if res != INVALID { + // disallow non advancing match + if s == res { + err = .Match_Invalid + } + + // NOTE(Skytrias): first result is reserved for a full match + matches[0] = { s, res } + + // rest are the actual captures + captures = push_captures(&ms, -1, -1, matches[1:]) or_return + captures += 1 + + return + } + + s += 1 + + if !(s < len(ms.src) && !anchor) { + break + } + } + + return +} + +// iterative matching which returns the 0th/1st match +// rest has to be used from captures +gmatch :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAX_CAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, false, captures) + + if length != 0 && err == .OK { + ok = true + first := length > 1 ? 1 : 0 + cap := captures[first] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] + } + } + + return +} + +// gsub with builder, replace patterns found with the replace content +gsub_builder :: proc( + builder: ^strings.Builder, + haystack: string, + pattern: string, + replace: string, +) -> string { + // find matches + captures: [MAX_CAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 { + break + } + + if err != .OK { + return {} + } + + cap := captures[0] + + // write front till capture + strings.write_string(builder, haystack[:cap.byte_start]) + + // write replacements + strings.write_string(builder, replace) + + // advance string till end + haystack = haystack[cap.byte_end:] + } + + strings.write_string(builder, haystack[:]) + return strings.to_string(builder^) +} + +// uses temp builder to build initial string - then allocates the result +gsub_allocator :: proc( + haystack: string, + pattern: string, + replace: string, + allocator := context.allocator, +) -> string { + builder := strings.builder_make(0, 256, context.temp_allocator) + return gsub_builder(&builder, haystack, pattern, replace) +} + +Gsub_Proc :: proc( + // optional passed data + data: rawptr, + // word match found + word: string, + // current haystack for found captures + haystack: string, + // found captures - empty for no captures + captures: []Match, +) + +// call a procedure on every match in the haystack +gsub_with :: proc( + haystack: string, + pattern: string, + data: rawptr, + call: Gsub_Proc, +) { + // find matches + captures: [MAX_CAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 || err != .OK { + break + } + + cap := captures[0] + + word := haystack[cap.byte_start:cap.byte_end] + call(data, word, haystack, captures[1:length]) + + // advance string till end + haystack = haystack[cap.byte_end:] + } +} + +gsub :: proc { gsub_builder, gsub_allocator } + +// iterative find with zeroth capture only +gfind :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAX_CAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, true, captures) + + if length != 0 && err == .OK { + ok = true + cap := captures[0] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] + } + } + + return +} + +// rebuilds a pattern into a case insensitive pattern +pattern_case_insensitive_builder :: proc( + builder: ^strings.Builder, + pattern: string, +) -> (res: string) { + p := pattern + last_percent: bool + + for len(p) > 0 { + char, size := utf8.decode_rune_in_string(p) + + if unicode.is_alpha(char) && !last_percent { + // write character class in manually + strings.write_byte(builder, '[') + strings.write_rune(builder, unicode.to_lower(char)) + strings.write_rune(builder, unicode.to_upper(char)) + strings.write_byte(builder, ']') + } else { + strings.write_rune(builder, char) + } + + last_percent = char == L_ESC + p = p[size:] + } + + return strings.to_string(builder^) +} + +pattern_case_insensitive_allocator :: proc( + pattern: string, + cap: int = 256, + allocator := context.allocator, +) -> (res: string) { + builder := strings.builder_make(0, cap, context.temp_allocator) + return pattern_case_insensitive_builder(&builder, pattern) +} + +pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } + +// Matcher helper struct that stores optional data you might want to use or not +// as lua is far more dynamic this helps dealing with too much data +// this also allows use of find/match/gmatch at through one struct +Matcher :: struct { + haystack: string, + pattern: string, + captures: [MAX_CAPTURES]Match, + captures_length: int, + offset: int, + err: Error, + + // changing content for iterators + iter: string, + iter_index: int, +} + +// init using haystack & pattern and an optional byte offset +matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) { + res.haystack = haystack + res.pattern = pattern + res.offset = offset + res.iter = haystack + return +} + +// find the first match and return the byte start / end position in the string, true on success +matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + true, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + start = match.byte_start + end = match.byte_end + return +} + +// find the first match and return the matched word, true on success +matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + word = matcher.haystack[match.byte_start:match.byte_end] + return +} + +// get the capture at the "correct" spot, as spot 0 is reserved for the first match +matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) + cap := matcher.captures[index + 1] + return matcher.haystack[cap.byte_start:cap.byte_end] +} + +// get the raw match out of the captures, skipping spot 0 +matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) + return matcher.captures[index + 1] +} + +// alias +matcher_gmatch :: matcher_match_iter + +// iteratively match the haystack till it cant find any matches +matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) { + if len(matcher.iter) > 0 { + matcher.captures_length, matcher.err = find_aux( + matcher.iter, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + + if matcher.captures_length != 0 && matcher.err == .OK { + ok = true + first := matcher.captures_length > 1 ? 1 : 0 + match := matcher.captures[first] + + // output + res = matcher.iter[match.byte_start:match.byte_end] + index = matcher.iter_index + + // advance + matcher.iter_index += 1 + matcher.iter = matcher.iter[match.byte_end:] + } + } + + return +} + +// get a slice of all valid captures above the first match +matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match { + return matcher.captures[1:matcher.captures_length] +} |