diff options
| author | gingerBill <bill@gingerbill.org> | 2020-09-25 20:20:53 +0100 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2020-09-25 20:20:53 +0100 |
| commit | 8cc5cd149489bc808c231cdc92e78011e49ce426 (patch) | |
| tree | 68a308816e6c6c5f1dc76244017df7cb92ef7b0d /core/path/filepath | |
| parent | 6b634d5e4604af01c86155def315c43c251fa8db (diff) | |
Add `package path/filepath`; Add `os.stat` for windows (TODO: unix)
Diffstat (limited to 'core/path/filepath')
| -rw-r--r-- | core/path/filepath/match.odin | 350 | ||||
| -rw-r--r-- | core/path/filepath/path.odin | 199 | ||||
| -rw-r--r-- | core/path/filepath/path_unix.odin | 5 | ||||
| -rw-r--r-- | core/path/filepath/path_windows.odin | 71 |
4 files changed, 625 insertions, 0 deletions
diff --git a/core/path/filepath/match.odin b/core/path/filepath/match.odin new file mode 100644 index 000000000..01f899da3 --- /dev/null +++ b/core/path/filepath/match.odin @@ -0,0 +1,350 @@ +package filepath + +import "core:os" +import "core:sort" +import "core:strings" +import "core:unicode/utf8" + +Match_Error :: enum { + None, + Syntax_Error, +} + +// match states whether "name" matches the shell pattern +// Pattern syntax is: +// pattern: +// {term} +// term: +// '*' matches any sequence of non-/ characters +// '?' matches any single non-/ character +// '[' ['^'] { character-range } ']' +// character classification (cannot be empty) +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// match requires that the pattern matches the entirety of the name, not just a substring +// The only possible error returned is .Syntax_Error +// +// NOTE(bill): This is effectively the shell pattern matching system found +// +match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) { + pattern, name := pattern, name; + pattern_loop: for len(pattern) > 0 { + star: bool; + chunk: string; + star, chunk, pattern = scan_chunk(pattern); + if star && chunk == "" { + return !strings.contains(name, SEPARATOR_STRING), .None; + } + + t: string; + ok: bool; + t, ok, err = match_chunk(chunk, name); + + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t; + continue; + } + if err != .None { + return; + } + if star { + for i := 0; i < len(name) && name[i] != SEPARATOR; i += 1 { + t, ok, err = match_chunk(chunk, name[i+1:]); + if ok { + if len(pattern) == 0 && len(t) > 0 { + continue; + } + name = t; + continue pattern_loop; + } + if err != .None { + return; + } + } + } + + return false, .None; + } + + return len(name) == 0, .None; +} + + +@(private="file") +scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) { + pattern := pattern; + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:]; + star = true; + } + in_range := false; + i: int; + + scan_loop: for i = 0; i < len(pattern); i += 1 { + switch pattern[i] { + case '\\': + when ODIN_OS != "windows" { + if i+1 < len(pattern) { + i += 1; + } + } + case '[': + in_range = true; + case ']': + in_range = false; + case '*': + if !in_range { + break scan_loop; + } + + } + } + return star, pattern[:i], pattern[i:]; +} + +@(private="file") +match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) { + chunk, s := chunk, s; + for len(chunk) > 0 { + if len(s) == 0 { + return; + } + switch chunk[0] { + case '[': + r, w := utf8.decode_rune_in_string(s); + s = s[w:]; + chunk = chunk[1:]; + is_negated := false; + if len(chunk) > 0 && chunk[0] == '^' { + is_negated = true; + chunk = chunk[1:]; + } + match := false; + range_count := 0; + for { + if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 { + chunk = chunk[1:]; + break; + } + lo, hi: rune; + if lo, chunk, err = get_escape(chunk); err != .None { + return; + } + hi = lo; + if chunk[0] == '-' { + if hi, chunk, err = get_escape(chunk[1:]); err != .None { + return; + } + } + + if lo <= r && r <= hi { + match = true; + } + range_count += 1; + } + if match == is_negated { + return; + } + + case '?': + if s[0] == SEPARATOR { + return; + } + _, w := utf8.decode_rune_in_string(s); + s = s[w:]; + chunk = chunk[1:]; + + case '\\': + when ODIN_OS != "windows" { + chunk = chunk[1:]; + if len(chunk) == 0 { + err = .Syntax_Error; + return; + } + } + fallthrough; + case: + if chunk[0] != s[0] { + return; + } + s = s[1:]; + chunk = chunk[1:]; + + } + } + return s, true, .None; +} + +@(private="file") +get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = .Syntax_Error; + return; + } + chunk := chunk; + if chunk[0] == '\\' && ODIN_OS != "windows" { + chunk = chunk[1:]; + if len(chunk) == 0 { + err = .Syntax_Error; + return; + } + } + + w: int; + r, w = utf8.decode_rune_in_string(chunk); + if r == utf8.RUNE_ERROR && w == 1 { + err = .Syntax_Error; + } + + next_chunk = chunk[w:]; + if len(next_chunk) == 0 { + err = .Syntax_Error; + } + + return; +} + + + +// glob returns the names of all files matching pattern or nil if there are no matching files +// The syntax of patterns is the same as "match". +// The pattern may describe hierarchical names such as /usr/*/bin (assuming '/' is a separator) +// +// glob ignores file system errors +// +glob :: proc(pattern: string, allocator := context.allocator) -> (matches: []string, err: Match_Error) { + if !has_meta(pattern) { + // TODO(bill): os.lstat on here to check for error + m := make([]string, 1, allocator); + m[0] = pattern; + return m[:], .None; + } + + temp_buf: [8]byte; + + dir, file := split(pattern); + volume_len := 0; + when ODIN_OS == "windows" { + volume_len, dir = clean_glob_path_windows(dir, temp_buf[:]); + } else { + dir = clean_glob_path(dir); + } + + if !has_meta(dir[volume_len:]) { + m, e := _glob(dir, file, nil); + return m[:], e; + } + + m: []string; + m, err = glob(dir); + if err != .None { + return; + } + dmatches := make([dynamic]string, 0, 0, allocator); + for d in m { + dmatches, err = _glob(d, file, &dmatches); + if err != .None { + break; + } + } + if len(dmatches) > 0 { + matches = dmatches[:]; + } + return; +} +_glob :: proc(dir, pattern: string, matches: ^[dynamic]string) -> (m: [dynamic]string, e: Match_Error) { + if matches != nil { + m = matches^; + } else { + m = make([dynamic]string, 0, 0, context.allocator); + } + + + d, derr := os.open(dir); + if derr != 0 { + return; + } + defer os.close(d); + + fi, ferr := os.stat(d); + if ferr != 0 { + os.file_info_delete(fi); + return; + } + if !fi.is_dir { + os.file_info_delete(fi); + return; + } + + + fis, _ := os.read_dir(d, -1); + sort.quick_sort_proc(fis, proc(a, b: os.File_Info) -> int { + return sort.compare_strings(a.name, b.name); + }); + defer { + for fi in fis { + os.file_info_delete(fi); + } + delete(fis); + } + + for fi in fis { + n := fi.name; + matched, err := match(pattern, n); + if err != nil { + return m, err; + } + if matched { + append(&m, join(dir, n)); + } + } + return; +} + +@(private) +has_meta :: proc(path: string) -> bool { + when ODIN_OS == "windows" { + CHARS :: `*?[`; + } else { + CHARS :: `*?[\`; + } + return strings.contains_any(path, CHARS); +} + +@(private) +clean_glob_path :: proc(path: string) -> string { + switch path { + case "": + return "."; + case SEPARATOR_STRING: + return path; + } + return path[:len(path)-1]; +} + + +@(private) +clean_glob_path_windows :: proc(path: string, temp_buf: []byte) -> (prefix_len: int, cleaned: string) { + vol_len := volume_name_len(path); + switch { + case path == "": + return 0, "."; + case vol_len+1 == len(path) && is_separator(path[len(path)-1]): // /, \, C:\, C:/ + return vol_len+1, path; + case vol_len == len(path) && len(path) == 2: // C: + copy(temp_buf[:], path); + temp_buf[2] = '.'; + return vol_len, string(temp_buf[:3]); + } + + if vol_len >= len(path) { + vol_len = len(path) -1; + } + return vol_len, path[:len(path)-1]; +} diff --git a/core/path/filepath/path.odin b/core/path/filepath/path.odin new file mode 100644 index 000000000..c15ff110a --- /dev/null +++ b/core/path/filepath/path.odin @@ -0,0 +1,199 @@ +package filepath + +import "core:os" +import "core:strings" + +// is_separator checks whether the byte is a valid separator character +is_separator :: proc(c: byte) -> bool { + switch c { + case '/': return true; + case '\\': return ODIN_OS == "windows"; + } + return false; +} + +@(private) +is_slash :: proc(c: byte) -> bool { + return c == '\\' || c == '/'; +} + +split :: proc(path: string) -> (dir, file: string) { + vol := volume_name(path); + i := len(path) - 1; + for i >= len(vol) && !is_separator(path[i]) { + i -= 1; + } + return path[:i+1], path[i+1:]; +} + +volume_name :: proc(path: string) -> string { + return path[:volume_name_len(path)]; +} + +volume_name_len :: proc(path: string) -> int { + if len(path) < 2 { + return 0; + } + c := path[0]; + if path[1] == ':' { + switch c { + case 'a'..'z', 'A'..'Z': + return 2; + } + } + + if l := len(path); l >= 5 && is_slash(path[0]) && is_slash(path[1]) && + !is_slash(path[2]) && path[2] != '.' { + for n := 3; n < l-1; n += 1 { + if is_slash(path[n]) { + n += 1; + if !is_slash(path[n]) { + if path[n] == '.' { + break; + } + } + for ; n < l; n += 1 { + if is_slash(path[n]) { + break; + } + } + return n; + } + break; + } + } + return 0; +} + + +clean :: proc(path: string, allocator := context.allocator) -> string { + context.allocator = allocator; + + path := path; + original_path := path; + vol_len := volume_name_len(path); + path = path[vol_len:]; + + if path == "" { + if vol_len > 1 && original_path[1] != ':' { + return from_slash(original_path); + } + return strings.concatenate({original_path, "."}); + } + + rooted := is_separator(path[0]); + + n := len(path); + out := &Lazy_Buffer{ + s = path, + vol_and_path = original_path, + vol_len = vol_len, + }; + + r, dot_dot := 0, 0; + if rooted { + lazy_buffer_append(out, '/'); + r, dot_dot = 1, 1; + } + + for r < n { + switch { + case is_separator(path[r]): + r += 1; + case path[r] == '.' && (r+1 == n || is_separator(path[r+1])): + r += 1; + case path[r] == '.' && path[r+1] == '.' && (r+2 == n || is_separator(path[r+2])): + r += 2; + switch { + case out.w > dot_dot: + out.w -= 1; + for out.w > dot_dot && !is_separator(lazy_buffer_index(out, out.w)) { + out.w -= 1; + } + case !rooted: + if out.w > 0 { + lazy_buffer_append(out, '/'); + } + lazy_buffer_append(out, '.'); + lazy_buffer_append(out, '.'); + dot_dot = out.w; + } + case: + if rooted && out.w != 1 || !rooted && out.w != 0 { + lazy_buffer_append(out, '/'); + } + for ; r < n && !is_separator(path[r]); r += 1 { + lazy_buffer_append(out, path[r]); + } + + } + } + + if out.w == 0 { + lazy_buffer_append(out, '.'); + } + + s := lazy_buffer_string(out); + cleaned := from_slash(s); + return cleaned; +} + +from_slash :: proc(path: string, allocator := context.allocator) -> string { + if SEPARATOR == '/' { + return path; + } + s, ok := strings.replace_all(path, "/", SEPARATOR_STRING, allocator); + if !ok { + s = strings.clone(s, allocator); + } + return s; +} + + + +/* + Lazy_Buffer is a lazily made path buffer + When it does allocate, it uses the context.allocator + */ +@(private) +Lazy_Buffer :: struct { + s: string, + b: []byte, + w: int, // write index + vol_and_path: string, + vol_len: int, +} + +@(private) +lazy_buffer_index :: proc(lb: ^Lazy_Buffer, i: int) -> byte { + if lb.b != nil { + return lb.b[i]; + } + return lb.s[i]; +} +@(private) +lazy_buffer_append :: proc(lb: ^Lazy_Buffer, c: byte) { + if lb.b == nil { + if lb.w < len(lb.s) && lb.s[lb.w] == c { + lb.w += 1; + return; + } + lb.b = make([]byte, len(lb.s)); + copy(lb.b, lb.s[:lb.w]); + } + lb.b[lb.w] = c; + lb.w += 1; +} +@(private) +lazy_buffer_string :: proc(lb: ^Lazy_Buffer) -> string { + if lb.b == nil { + return strings.clone(lb.vol_and_path[:lb.vol_len+lb.w]); + } + + x := lb.vol_and_path[:lb.vol_len]; + y := string(lb.b[:lb.w]); + z := make([]byte, len(x)+len(y)); + copy(z, x); + copy(z[len(x):], y); + return string(z); +} diff --git a/core/path/filepath/path_unix.odin b/core/path/filepath/path_unix.odin new file mode 100644 index 000000000..7ffe6291e --- /dev/null +++ b/core/path/filepath/path_unix.odin @@ -0,0 +1,5 @@ +//+build linux, darwin, freebsd +package filepath + +SEPARATOR :: '/'; +SEPARATOR_STRING :: `/`; diff --git a/core/path/filepath/path_windows.odin b/core/path/filepath/path_windows.odin new file mode 100644 index 000000000..604859ca8 --- /dev/null +++ b/core/path/filepath/path_windows.odin @@ -0,0 +1,71 @@ +package filepath + +import "core:strings" + +SEPARATOR :: '\\'; +SEPARATOR_STRING :: `\`; + + +reserved_names := []string{ + "CON", "PRN", "AUX", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", +}; + +is_reserved_name :: proc(path: string) -> bool { + if len(path) == 0 { + return false; + } + for reserved in reserved_names { + if strings.equal_fold(path, reserved) { + return true; + } + } + return false; +} + +is_UNC :: proc(path: string) -> bool { + return volume_name_len(path) > 2; +} + +join :: proc(elems: ..string, allocator := context.allocator) -> string { + for e, i in elems { + if e != "" { + return join_non_empty(elems[i:]); + } + } + return ""; +} + +join_non_empty :: proc(elems: []string) -> string { + if len(elems[0]) == 2 && elems[0][1] == ':' { + i := 1; + for ; i < len(elems); i += 1 { + if elems[i] != "" { + break; + } + } + s := strings.join(elems[i:], SEPARATOR_STRING, context.temp_allocator); + s = strings.concatenate({elems[0], s}, context.temp_allocator); + return clean(s); + } + + s := strings.join(elems, SEPARATOR_STRING, context.temp_allocator); + p := clean(s); + if !is_UNC(p) { + return p; + } + + head := clean(elems[0], context.temp_allocator); + if is_UNC(head) { + return p; + } + delete(p); // It is not needed now + + tail := clean(strings.join(elems[1:], SEPARATOR_STRING, context.temp_allocator), context.temp_allocator); + if head[len(head)-1] == SEPARATOR { + return strings.concatenate({head, tail}); + } + + return strings.concatenate({head, SEPARATOR_STRING, tail}); +} |