aboutsummaryrefslogtreecommitdiff
path: root/core/path
diff options
context:
space:
mode:
authorTetralux <tetraluxonpc@gmail.com>2022-04-23 03:33:35 +0000
committerTetralux <tetraluxonpc@gmail.com>2022-04-23 20:25:59 +0000
commitb44b6e7e5099cab83f4a6d0feb5af9f245dea738 (patch)
tree5821f52c432a07d374fddc4b89d8a466833b3734 /core/path
parent849efff07015930b18e9e158d67315c823a406b6 (diff)
[path/filepath] Add file stem and long-extension procedures
Adds stem(), short_stem(), and long_ext(); also adds doc-comments to base() and ext(). The 'stem' is usually 'the name' of the file; the basename without the file extension. To this end, this adds stem(), which is such that: stem(path) + ext(path) = base(path) However, 'file extension' has two different meanings to what constitutes it! > What is the extension of: 'name.tar.gz' ? Colloquially, you would likely think of it as 'a tarball' - which you might think is '.tar.gz'. But, if you're writing code to process a file of this type, you would first treat it as a Gzip file, and then treat the result as a TAR file - i.e: '.gz' ... _followed by_ '.tar'. ext() returns '.gz' here, since that is the most-immediate format that you would need to use to decode it; it would be a Gzip stream. Sometimes though, you do actually want to consider these longer file extensions. Perhaps you're extracting a tarball, and what to know what to call the intermediate tar file; perhaps you want to check to see if this file is a tarball, or just a Gzip file; or maybe you just want 'the name' of the file, and not this "strange 'name-and-part-of-the-extension' thing". So, this also adds short_stem() and long_ext(), such that: short_stem(path) + long_ext(path) = base(path) Thus, we can use either, but the most immediately-useful one is the easiest to reach for: stem('name.tar.gz') -> 'name.tar' ext('name.tar.gz') -> '.gz' short_stem('name.tar.gz') -> 'name' long_ext('name.tar.gz') -> '.tar.gz' These procedures are identical to their counterparts when the path only has a simple extension: stem('name.txt') -> 'name' ext('name.txt') -> '.txt' short_stem('name.txt') -> 'name' long_ext('name.txt') -> '.txt'
Diffstat (limited to 'core/path')
-rw-r--r--core/path/filepath/path.odin133
1 files changed, 124 insertions, 9 deletions
diff --git a/core/path/filepath/path.odin b/core/path/filepath/path.odin
index 42714d736..32e4a8a37 100644
--- a/core/path/filepath/path.odin
+++ b/core/path/filepath/path.odin
@@ -4,6 +4,8 @@ package filepath
import "core:strings"
+SEPARATOR_CHARS :: `/\`
+
// is_separator checks whether the byte is a valid separator character
is_separator :: proc(c: byte) -> bool {
switch c {
@@ -69,6 +71,16 @@ volume_name_len :: proc(path: string) -> int {
return 0
}
+/*
+ Gets the file name and extension from a path.
+
+ i.e:
+ 'path/to/name.tar.gz' -> 'name.tar.gz'
+ 'path/to/name.txt' -> 'name.txt'
+ 'path/to/name' -> 'name'
+
+ Returns "." if the path is an empty string.
+*/
base :: proc(path: string) -> string {
if path == "" {
return "."
@@ -94,6 +106,118 @@ base :: proc(path: string) -> string {
return path
}
+/*
+ Gets the name of a file from a path.
+
+ The stem of a file is such that stem(path) + ext(path) = base(path).
+
+ Only the last dot is considered when splitting the file extension.
+ See `short_stem`.
+
+ i.e:
+ 'name.tar.gz' -> 'name.tar'
+ 'name.txt' -> 'name'
+
+ Returns an empty string if there is no stem. e.g: '.gitignore'.
+ Returns an empty string if there's a trailing path separator.
+*/
+stem :: proc(path: string) -> string {
+ if len(path) > 0 && is_separator(path[len(path) - 1]) {
+ // NOTE(tetra): Trailing separator
+ return ""
+ }
+
+ // NOTE(tetra): Get the basename
+ path := path
+ if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
+ path = path[i+1:]
+ }
+
+ if i := strings.last_index_byte(path, '.'); i != -1 {
+ return path[:i]
+ }
+
+ return path
+}
+
+/*
+ Gets the name of a file from a path.
+
+ The short stem is such that short_stem(path) + long_ext(path) = base(path).
+
+ The first dot is used to split off the file extension, unlike `stem` which uses the last dot.
+
+ i.e:
+ 'name.tar.gz' -> 'name'
+ 'name.txt' -> 'name'
+
+ Returns an empty string if there is no stem. e.g: '.gitignore'.
+ Returns an empty string if there's a trailing path separator.
+*/
+short_stem :: proc(path: string) -> string {
+ s := stem(path)
+ if i := strings.index_byte(s, '.'); i != -1 {
+ return s[:i]
+ }
+ return s
+}
+
+/*
+ Gets the file extension from a path, including the dot.
+
+ The file extension is such that stem(path) + ext(path) = base(path).
+
+ Only the last dot is considered when splitting the file extension.
+ See `long_ext`.
+
+ i.e:
+ 'name.tar.gz' -> '.gz'
+ 'name.txt' -> '.txt'
+
+ Returns an empty string if there is no dot.
+ Returns an empty string if there is a trailing path separator.
+*/
+ext :: proc(path: string) -> string {
+ for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 {
+ if path[i] == '.' {
+ return path[i:]
+ }
+ }
+ return ""
+}
+
+/*
+ Gets the file extension from a path, including the dot.
+
+ The long file extension is such that short_stem(path) + long_ext(path) = base(path).
+
+ The first dot is used to split off the file extension, unlike `ext` which uses the last dot.
+
+ i.e:
+ 'name.tar.gz' -> '.tar.gz'
+ 'name.txt' -> '.txt'
+
+ Returns an empty string if there is no dot.
+ Returns an empty string if there is a trailing path separator.
+*/
+long_ext :: proc(path: string) -> string {
+ if len(path) > 0 && is_separator(path[len(path) - 1]) {
+ // NOTE(tetra): Trailing separator
+ return ""
+ }
+
+ // NOTE(tetra): Get the basename
+ path := path
+ if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
+ path = path[i+1:]
+ }
+
+ if i := strings.index_byte(path, '.'); i != -1 {
+ return path[i:]
+ }
+
+ return ""
+}
clean :: proc(path: string, allocator := context.allocator) -> string {
context.allocator = allocator
@@ -189,15 +313,6 @@ to_slash :: proc(path: string, allocator := context.allocator) -> (new_path: str
return strings.replace_all(path, SEPARATOR_STRING, "/", allocator)
}
-ext :: proc(path: string) -> string {
- for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 {
- if path[i] == '.' {
- return path[i:]
- }
- }
- return ""
-}
-
Relative_Error :: enum {
None,