diff options
| author | Tetralux <tetraluxonpc@gmail.com> | 2022-04-23 03:33:35 +0000 |
|---|---|---|
| committer | Tetralux <tetraluxonpc@gmail.com> | 2022-04-23 20:25:59 +0000 |
| commit | b44b6e7e5099cab83f4a6d0feb5af9f245dea738 (patch) | |
| tree | 5821f52c432a07d374fddc4b89d8a466833b3734 /core/path | |
| parent | 849efff07015930b18e9e158d67315c823a406b6 (diff) | |
[path/filepath] Add file stem and long-extension procedures
Adds stem(), short_stem(), and long_ext(); also adds doc-comments to base() and ext().
The 'stem' is usually 'the name' of the file; the basename without the file extension.
To this end, this adds stem(), which is such that:
stem(path) + ext(path) = base(path)
However, 'file extension' has two different meanings to what constitutes it!
> What is the extension of: 'name.tar.gz' ?
Colloquially, you would likely think of it as 'a tarball' - which you might think is '.tar.gz'.
But, if you're writing code to process a file of this type, you would first treat it
as a Gzip file, and then treat the result as a TAR file - i.e: '.gz' ... _followed by_ '.tar'.
ext() returns '.gz' here, since that is the most-immediate format that you would need to use
to decode it; it would be a Gzip stream.
Sometimes though, you do actually want to consider these longer file extensions.
Perhaps you're extracting a tarball, and what to know what to call the intermediate tar file;
perhaps you want to check to see if this file is a tarball, or just a Gzip file;
or maybe you just want 'the name' of the file, and not this "strange 'name-and-part-of-the-extension' thing".
So, this also adds short_stem() and long_ext(), such that:
short_stem(path) + long_ext(path) = base(path)
Thus, we can use either, but the most immediately-useful one is the easiest to reach for:
stem('name.tar.gz') -> 'name.tar'
ext('name.tar.gz') -> '.gz'
short_stem('name.tar.gz') -> 'name'
long_ext('name.tar.gz') -> '.tar.gz'
These procedures are identical to their counterparts when the path only has a simple extension:
stem('name.txt') -> 'name'
ext('name.txt') -> '.txt'
short_stem('name.txt') -> 'name'
long_ext('name.txt') -> '.txt'
Diffstat (limited to 'core/path')
| -rw-r--r-- | core/path/filepath/path.odin | 133 |
1 files changed, 124 insertions, 9 deletions
diff --git a/core/path/filepath/path.odin b/core/path/filepath/path.odin index 42714d736..32e4a8a37 100644 --- a/core/path/filepath/path.odin +++ b/core/path/filepath/path.odin @@ -4,6 +4,8 @@ package filepath import "core:strings" +SEPARATOR_CHARS :: `/\` + // is_separator checks whether the byte is a valid separator character is_separator :: proc(c: byte) -> bool { switch c { @@ -69,6 +71,16 @@ volume_name_len :: proc(path: string) -> int { return 0 } +/* + Gets the file name and extension from a path. + + i.e: + 'path/to/name.tar.gz' -> 'name.tar.gz' + 'path/to/name.txt' -> 'name.txt' + 'path/to/name' -> 'name' + + Returns "." if the path is an empty string. +*/ base :: proc(path: string) -> string { if path == "" { return "." @@ -94,6 +106,118 @@ base :: proc(path: string) -> string { return path } +/* + Gets the name of a file from a path. + + The stem of a file is such that stem(path) + ext(path) = base(path). + + Only the last dot is considered when splitting the file extension. + See `short_stem`. + + i.e: + 'name.tar.gz' -> 'name.tar' + 'name.txt' -> 'name' + + Returns an empty string if there is no stem. e.g: '.gitignore'. + Returns an empty string if there's a trailing path separator. +*/ +stem :: proc(path: string) -> string { + if len(path) > 0 && is_separator(path[len(path) - 1]) { + // NOTE(tetra): Trailing separator + return "" + } + + // NOTE(tetra): Get the basename + path := path + if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 { + path = path[i+1:] + } + + if i := strings.last_index_byte(path, '.'); i != -1 { + return path[:i] + } + + return path +} + +/* + Gets the name of a file from a path. + + The short stem is such that short_stem(path) + long_ext(path) = base(path). + + The first dot is used to split off the file extension, unlike `stem` which uses the last dot. + + i.e: + 'name.tar.gz' -> 'name' + 'name.txt' -> 'name' + + Returns an empty string if there is no stem. e.g: '.gitignore'. + Returns an empty string if there's a trailing path separator. +*/ +short_stem :: proc(path: string) -> string { + s := stem(path) + if i := strings.index_byte(s, '.'); i != -1 { + return s[:i] + } + return s +} + +/* + Gets the file extension from a path, including the dot. + + The file extension is such that stem(path) + ext(path) = base(path). + + Only the last dot is considered when splitting the file extension. + See `long_ext`. + + i.e: + 'name.tar.gz' -> '.gz' + 'name.txt' -> '.txt' + + Returns an empty string if there is no dot. + Returns an empty string if there is a trailing path separator. +*/ +ext :: proc(path: string) -> string { + for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 { + if path[i] == '.' { + return path[i:] + } + } + return "" +} + +/* + Gets the file extension from a path, including the dot. + + The long file extension is such that short_stem(path) + long_ext(path) = base(path). + + The first dot is used to split off the file extension, unlike `ext` which uses the last dot. + + i.e: + 'name.tar.gz' -> '.tar.gz' + 'name.txt' -> '.txt' + + Returns an empty string if there is no dot. + Returns an empty string if there is a trailing path separator. +*/ +long_ext :: proc(path: string) -> string { + if len(path) > 0 && is_separator(path[len(path) - 1]) { + // NOTE(tetra): Trailing separator + return "" + } + + // NOTE(tetra): Get the basename + path := path + if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 { + path = path[i+1:] + } + + if i := strings.index_byte(path, '.'); i != -1 { + return path[i:] + } + + return "" +} clean :: proc(path: string, allocator := context.allocator) -> string { context.allocator = allocator @@ -189,15 +313,6 @@ to_slash :: proc(path: string, allocator := context.allocator) -> (new_path: str return strings.replace_all(path, SEPARATOR_STRING, "/", allocator) } -ext :: proc(path: string) -> string { - for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 { - if path[i] == '.' { - return path[i:] - } - } - return "" -} - Relative_Error :: enum { None, |