From 2eadf1b0f1ee365804b9f3c52cee3b451ba8d2f2 Mon Sep 17 00:00:00 2001 From: Chip Senkbeil Date: Sat, 24 Feb 2024 20:43:09 -0600 Subject: [PATCH] Introduce checked functions for pushing, joining, and switching encodings (#23) * Implement push_checked on Encoding and Utf8Encoding traits alongside their unix and windows implementations * Implement push_checked for Pathbuf and Utf8PathBuf; add join_checked to Path and Utf8Path * Implement push_checked for TypedPathBuf and Utf8TypedPathBuf * Implement join_checked for TypedPath, Utf8TypedPath, TypedPathBuf, and Utf8TypedPathBuf * Implement with_encoding_checked for non-utf8 and utf8 paths; add is_valid method to component traits and implementations * Implement with_unix_encoding_checked and with_windows_encoding_checked functions * Bump version to 0.8.0 and update changelog & readme to reference new functionality * Add is_valid to Path/Utf8Path --- CHANGELOG.md | 12 ++ Cargo.toml | 2 +- README.md | 67 ++++++- src/common/errors.rs | 31 ++++ src/common/non_utf8.rs | 9 + src/common/non_utf8/components/component.rs | 22 +++ src/common/non_utf8/path.rs | 127 ++++++++++++- src/common/non_utf8/pathbuf.rs | 59 +++++- src/common/utf8.rs | 9 + src/common/utf8/components/component.rs | 54 ++++-- src/common/utf8/path.rs | 131 ++++++++++++- src/common/utf8/pathbuf.rs | 59 +++++- src/typed/non_utf8/path.rs | 58 +++++- src/typed/non_utf8/pathbuf.rs | 118 +++++++++++- src/typed/utf8/path.rs | 67 ++++++- src/typed/utf8/pathbuf.rs | 122 ++++++++++++- src/unix/non_utf8.rs | 148 +++++++++++++++ src/unix/non_utf8/components/component.rs | 25 ++- src/unix/utf8.rs | 125 +++++++++++++ src/unix/utf8/components/component.rs | 27 ++- src/windows/non_utf8.rs | 183 +++++++++++++++++++ src/windows/non_utf8/components/component.rs | 29 ++- src/windows/utf8.rs | 157 ++++++++++++++++ src/windows/utf8/components/component.rs | 29 ++- 24 files changed, 1633 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3ef78..4dee089 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.0] - 2024-02-24 + +* Add `push_checked` function, which ensures that any path added to an existing `PathBuf` or `TypedPathBuf` must abide by the following rules: + 1. It cannot be an absolute path. Only relative paths allowed. + 2. In the case of Windows, it cannot start with a prefix like `C:`. + 3. All normal components of the path must contain only valid characters. + 4. If parent directory (..) components are present, they must not result in a path traversal attack (impacting the current path). +* Add `join_checked` function, which ensures that any path joied with an existing path follows the rules of `push_checked` +* Add `with_encoding_checked` function to ensure that the resulting path from an encoding conversion is still valid +* Add `with_unix_encoding_checked` and `with_windows_encoding_checked` functions as shortcuts to `with_encoding_checked` +* Add `is_valid` to `Component` and `Utf8Component` traits alongside `Path` and `Utf8Path` to indicate if a component/path is valid for the given encoding + ## [0.7.1] - 2024-02-15 * Support `wasm` family for compilation diff --git a/Cargo.toml b/Cargo.toml index 2a03d4c..2647bd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "typed-path" description = "Provides typed variants of Path and PathBuf for Unix and Windows" -version = "0.7.1" +version = "0.8.0" edition = "2021" authors = ["Chip Senkbeil "] categories = ["development-tools", "filesystem", "os"] diff --git a/README.md b/README.md index 537fd4a..7d603c1 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Unix and Windows. ```toml [dependencies] -typed-path = "0.7" +typed-path = "0.8" ``` As of version `0.7`, this library also supports `no_std` environments that @@ -124,6 +124,51 @@ fn main() { } ``` +### Checking paths + +When working with user-defined paths, there is an additional layer of defense needed to prevent abuse to avoid [path traversal attacks](https://owasp.org/www-community/attacks/Path_Traversal) and other risks. + +To that end, you can use `PathBuf::push_checked` and `Path::join_checked` (and equivalents) to ensure that the paths being created do not alter pre-existing paths in unexpected ways. + +```rust +use typed_path::{CheckedPathError, Path, PathBuf, UnixEncoding}; + +fn main() { + let path = Path::::new("/etc"); + + // A valid path can be joined onto the existing one + assert_eq!(path.join_checked("passwd"), Ok(PathBuf::from("/etc/passwd"))); + + // An invalid path will result in an error + assert_eq!( + path.join_checked("/sneaky/replacement"), + Err(CheckedPathError::UnexpectedRoot) + ); + + let mut path = PathBuf::::from("/etc"); + + // Pushing a relative path that contains parent directory references that cannot be + // resolved within the path is considered an error as this is considered a path + // traversal attack! + assert_eq!( + path.push_checked(".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(path, PathBuf::from("/etc")); + + // Pushing an absolute path will fail with an error + assert_eq!( + path.push_checked("/sneaky/replacement"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(path, PathBuf::from("/etc")); + + // Pushing a relative path that is safe will succeed + assert!(path.push_checked("abc/../def").is_ok()); + assert_eq!(path, PathBuf::from("/etc/abc/../def")); +} +``` + ### Converting between encodings There may be times in which you need to convert between encodings such as when @@ -156,6 +201,26 @@ fn main() { } ``` +Like with pushing and joining paths using *checked* variants, we can also ensure that paths created from changing encodings are still valid: + +```rust +use typed_path::{CheckedPathError, Utf8Path, Utf8UnixEncoding, Utf8WindowsEncoding}; + +fn main() { + // Convert from Unix to Windows + let unix_path = Utf8Path::::new("/tmp/foo.txt"); + let windows_path = unix_path.with_encoding_checked::().unwrap(); + assert_eq!(windows_path, Utf8Path::::new(r"\tmp\foo.txt")); + + // Convert from Unix to Windows will fail if there are characters that are valid in Unix but not in Windows + let unix_path = Utf8Path::::new("/tmp/|invalid|/foo.txt"); + assert_eq!( + unix_path.with_encoding_checked::(), + Err(CheckedPathError::InvalidFilename), + ); +} +``` + ### Typed Paths In the above examples, we were using paths where the encoding (Unix or Windows) diff --git a/src/common/errors.rs b/src/common/errors.rs index 0b04851..ff64a29 100644 --- a/src/common/errors.rs +++ b/src/common/errors.rs @@ -18,3 +18,34 @@ impl fmt::Display for StripPrefixError { #[cfg(feature = "std")] impl std::error::Error for StripPrefixError {} + +/// An error returned when a path violates checked criteria. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CheckedPathError { + /// When a normal component contains invalid characters for the current encoding. + InvalidFilename, + + /// When a path component that represents a parent directory is provided such that the original + /// path would be escaped to access arbitrary files. + PathTraversalAttack, + + /// When a path component that represents a prefix is provided after the start of the path. + UnexpectedPrefix, + + /// When a path component that represents a root is provided after the start of the path. + UnexpectedRoot, +} + +impl fmt::Display for CheckedPathError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidFilename => write!(f, "path contains invalid filename"), + Self::PathTraversalAttack => write!(f, "path attempts to escape original path"), + Self::UnexpectedPrefix => write!(f, "path contains unexpected prefix"), + Self::UnexpectedRoot => write!(f, "path contains unexpected root"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CheckedPathError {} diff --git a/src/common/non_utf8.rs b/src/common/non_utf8.rs index d716ba8..175965b 100644 --- a/src/common/non_utf8.rs +++ b/src/common/non_utf8.rs @@ -14,6 +14,7 @@ pub use parser::ParseError; pub use path::*; pub use pathbuf::*; +use crate::common::errors::CheckedPathError; use crate::no_std_compat::*; use crate::private; @@ -33,4 +34,12 @@ pub trait Encoding<'a>: private::Sealed { /// Pushes a byte slice (`path`) onto the an existing path (`current_path`) fn push(current_path: &mut Vec, path: &[u8]); + + /// Like [`Encoding::push`], but enforces several new rules: + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename bytes. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + fn push_checked(current_path: &mut Vec, path: &[u8]) -> Result<(), CheckedPathError>; } diff --git a/src/common/non_utf8/components/component.rs b/src/common/non_utf8/components/component.rs index dfb0b5c..4969d48 100644 --- a/src/common/non_utf8/components/component.rs +++ b/src/common/non_utf8/components/component.rs @@ -64,6 +64,28 @@ pub trait Component<'a>: /// * `UnixComponent::Normal("here.txt")` - `is_current() == false` fn is_current(&self) -> bool; + /// Returns true if this component is valid. A component can only be invalid if it represents a + /// normal component with bytes that are disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Component, UnixComponent, WindowsComponent}; + /// + /// assert!(UnixComponent::RootDir.is_valid()); + /// assert!(UnixComponent::ParentDir.is_valid()); + /// assert!(UnixComponent::CurDir.is_valid()); + /// assert!(UnixComponent::Normal(b"abc").is_valid()); + /// assert!(!UnixComponent::Normal(b"\0").is_valid()); + /// + /// assert!(WindowsComponent::RootDir.is_valid()); + /// assert!(WindowsComponent::ParentDir.is_valid()); + /// assert!(WindowsComponent::CurDir.is_valid()); + /// assert!(WindowsComponent::Normal(b"abc").is_valid()); + /// assert!(!WindowsComponent::Normal(b"|").is_valid()); + /// ``` + fn is_valid(&self) -> bool; + /// Returns size of component in bytes fn len(&self) -> usize; diff --git a/src/common/non_utf8/path.rs b/src/common/non_utf8/path.rs index 75490cb..1fa7260 100644 --- a/src/common/non_utf8/path.rs +++ b/src/common/non_utf8/path.rs @@ -10,7 +10,9 @@ use core::{cmp, fmt}; pub use display::Display; use crate::no_std_compat::*; -use crate::{Ancestors, Component, Components, Encoding, Iter, PathBuf, StripPrefixError}; +use crate::{ + Ancestors, CheckedPathError, Component, Components, Encoding, Iter, PathBuf, StripPrefixError, +}; /// A slice of a path (akin to [`str`]). /// @@ -252,6 +254,25 @@ where !self.is_absolute() } + /// Returns `true` if the path is valid, meaning that all of its components are valid. + /// + /// See [`Component::is_valid`]'s documentation for more details. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Path, UnixEncoding}; + /// + /// // NOTE: A path cannot be created on its own without a defined encoding + /// assert!(Path::::new("foo.txt").is_valid()); + /// assert!(!Path::::new("foo\0.txt").is_valid()); + /// ``` + /// + /// [`Component::is_valid`]: crate::Component::is_valid + pub fn is_valid(&self) -> bool { + self.components().all(|c| c.is_valid()) + } + /// Returns `true` if the `Path` has a root. /// /// * On Unix ([`UnixPath`]), a path has a root if it begins with `/`. @@ -666,6 +687,35 @@ where buf } + /// Creates an owned [`PathBuf`] with `path` adjoined to `self`, checking the `path` to ensure + /// it is safe to join. _When dealing with user-provided paths, this is the preferred method._ + /// + /// See [`PathBuf::push_checked`] for more details on what it means to adjoin a path safely. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Path, PathBuf, UnixEncoding}; + /// + /// // NOTE: A path cannot be created on its own without a defined encoding + /// let path = Path::::new("/etc"); + /// + /// // A valid path can be joined onto the existing one + /// assert_eq!(path.join_checked("passwd"), Ok(PathBuf::from("/etc/passwd"))); + /// + /// // An invalid path will result in an error + /// assert_eq!(path.join_checked("/sneaky/replacement"), Err(CheckedPathError::UnexpectedRoot)); + /// ``` + pub fn join_checked>>(&self, path: P) -> Result, CheckedPathError> { + self._join_checked(path.as_ref()) + } + + fn _join_checked(&self, path: &Path) -> Result, CheckedPathError> { + let mut buf = self.to_path_buf(); + buf.push_checked(path)?; + Ok(buf) + } + /// Creates an owned [`PathBuf`] like `self` but with the given file name. /// /// See [`PathBuf::set_file_name`] for more details. @@ -878,6 +928,81 @@ where } } + /// Like [`with_encoding`], creates an owned [`PathBuf`] like `self` but with a different + /// encoding. Additionally, checks to ensure that the produced path will be valid. + /// + /// # Note + /// + /// As part of the process of converting between encodings, the path will need to be rebuilt. + /// This involves [`pushing and checking`] each component, which may result in differences in + /// the resulting path such as resolving `.` and `..` early or other unexpected side effects. + /// + /// [`pushing and checking`]: PathBuf::push_checked + /// [`with_encoding`]: Path::with_encoding + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Path, UnixEncoding, WindowsEncoding}; + /// + /// // Convert from Unix to Windows + /// let unix_path = Path::::new("/tmp/foo.txt"); + /// let windows_path = unix_path.with_encoding_checked::().unwrap(); + /// assert_eq!(windows_path, Path::::new(r"\tmp\foo.txt")); + /// + /// // Converting from Windows to Unix will drop any prefix + /// let windows_path = Path::::new(r"C:\tmp\foo.txt"); + /// let unix_path = windows_path.with_encoding_checked::().unwrap(); + /// assert_eq!(unix_path, Path::::new(r"/tmp/foo.txt")); + /// + /// // Converting from Unix to Windows with invalid filename characters like `:` should fail + /// let unix_path = Path::::new("/|invalid|/foo.txt"); + /// assert_eq!( + /// unix_path.with_encoding_checked::(), + /// Err(CheckedPathError::InvalidFilename), + /// ); + /// + /// // Converting from Unix to Windows with unexpected prefix embedded in path should fail + /// let unix_path = Path::::new("/path/c:/foo.txt"); + /// assert_eq!( + /// unix_path.with_encoding_checked::(), + /// Err(CheckedPathError::UnexpectedPrefix), + /// ); + /// ``` + pub fn with_encoding_checked(&self) -> Result, CheckedPathError> + where + U: for<'enc> Encoding<'enc>, + { + let mut path = PathBuf::new(); + + // For root, current, and parent we specially handle to convert to the appropriate type, + // otherwise we attempt to push using the checked variant, which will ensure that the + // destination encoding is respected + for component in self.components() { + if component.is_root() { + path.push( + <<::Components as Components>::Component as Component>::root() + .as_bytes(), + ); + } else if component.is_current() { + path.push( + <<::Components as Components>::Component as Component>::current( + ) + .as_bytes(), + ); + } else if component.is_parent() { + path.push( + <<::Components as Components>::Component as Component>::parent() + .as_bytes(), + ); + } else { + path.push_checked(component.as_bytes())?; + } + } + + Ok(path) + } + /// Converts a [`Box`](Box) into a /// [`PathBuf`] without copying or allocating. pub fn into_path_buf(self: Box>) -> PathBuf { diff --git a/src/common/non_utf8/pathbuf.rs b/src/common/non_utf8/pathbuf.rs index 8cceb5f..7c01203 100644 --- a/src/common/non_utf8/pathbuf.rs +++ b/src/common/non_utf8/pathbuf.rs @@ -9,7 +9,7 @@ use core::str::FromStr; use core::{cmp, fmt}; use crate::no_std_compat::*; -use crate::{Encoding, Iter, Path}; +use crate::{CheckedPathError, Encoding, Iter, Path}; /// An owned, mutable path that mirrors [`std::path::PathBuf`], but operatings using an /// [`Encoding`] to determine how to parse the underlying bytes. @@ -175,6 +175,63 @@ where T::push(&mut self.inner, path.as_ref().as_bytes()); } + /// Like [`PathBuf::push`], extends `self` with `path`, but also checks to ensure that `path` + /// abides by a set of rules. + /// + /// # Rules + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename bytes. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use typed_path::{PathBuf, UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = PathBuf::::from("/tmp"); + /// + /// // Pushing a relative path works like normal + /// assert!(path.push_checked("file.bk").is_ok()); + /// assert_eq!(path, PathBuf::from("/tmp/file.bk")); + /// ``` + /// + /// Pushing a relative path that contains unresolved parent directory references fails + /// with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, PathBuf, UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = PathBuf::::from("/tmp"); + /// + /// // Pushing a relative path that contains parent directory references that cannot be + /// // resolved within the path is considered an error as this is considered a path + /// // traversal attack! + /// assert_eq!(path.push_checked(".."), Err(CheckedPathError::PathTraversalAttack)); + /// assert_eq!(path, PathBuf::from("/tmp")); + /// ``` + /// + /// Pushing an absolute path fails with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, PathBuf, UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = PathBuf::::from("/tmp"); + /// + /// // Pushing an absolute path will fail with an error + /// assert_eq!(path.push_checked("/etc"), Err(CheckedPathError::UnexpectedRoot)); + /// assert_eq!(path, PathBuf::from("/tmp")); + /// ``` + pub fn push_checked>>(&mut self, path: P) -> Result<(), CheckedPathError> { + T::push_checked(&mut self.inner, path.as_ref().as_bytes()) + } + /// Truncates `self` to [`self.parent`]. /// /// Returns `false` and does nothing if [`self.parent`] is [`None`]. diff --git a/src/common/utf8.rs b/src/common/utf8.rs index 92cb2ee..89ce704 100644 --- a/src/common/utf8.rs +++ b/src/common/utf8.rs @@ -10,6 +10,7 @@ pub use iter::*; pub use path::*; pub use pathbuf::*; +use crate::common::errors::CheckedPathError; use crate::no_std_compat::*; use crate::private; @@ -29,4 +30,12 @@ pub trait Utf8Encoding<'a>: private::Sealed { /// Pushes a utf8 str (`path`) onto the an existing path (`current_path`) fn push(current_path: &mut String, path: &str); + + /// Like [`Utf8Encoding::push`], but enforces several new rules: + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename characters. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + fn push_checked(current_path: &mut String, path: &str) -> Result<(), CheckedPathError>; } diff --git a/src/common/utf8/components/component.rs b/src/common/utf8/components/component.rs index 8f5fead..f00496f 100644 --- a/src/common/utf8/components/component.rs +++ b/src/common/utf8/components/component.rs @@ -21,10 +21,10 @@ pub trait Utf8Component<'a>: /// /// `/my/../path/./here.txt` has the components on Unix of /// - /// * `UnixComponent::RootDir` - `is_root() == true` - /// * `UnixComponent::ParentDir` - `is_root() == false` - /// * `UnixComponent::CurDir` - `is_root() == false` - /// * `UnixComponent::Normal("here.txt")` - `is_root() == false` + /// * `Utf8UnixComponent::RootDir` - `is_root() == true` + /// * `Utf8UnixComponent::ParentDir` - `is_root() == false` + /// * `Utf8UnixComponent::CurDir` - `is_root() == false` + /// * `Utf8UnixComponent::Normal("here.txt")` - `is_root() == false` fn is_root(&self) -> bool; /// Returns true if this component represents a normal part of the path @@ -33,10 +33,10 @@ pub trait Utf8Component<'a>: /// /// `/my/../path/./here.txt` has the components on Unix of /// - /// * `UnixComponent::RootDir` - `is_normal() == false` - /// * `UnixComponent::ParentDir` - `is_normal() == false` - /// * `UnixComponent::CurDir` - `is_normal() == false` - /// * `UnixComponent::Normal("here.txt")` - `is_normal() == true` + /// * `Utf8UnixComponent::RootDir` - `is_normal() == false` + /// * `Utf8UnixComponent::ParentDir` - `is_normal() == false` + /// * `Utf8UnixComponent::CurDir` - `is_normal() == false` + /// * `Utf8UnixComponent::Normal("here.txt")` - `is_normal() == true` fn is_normal(&self) -> bool; /// Returns true if this component represents a relative representation of a parent directory @@ -45,10 +45,10 @@ pub trait Utf8Component<'a>: /// /// `/my/../path/./here.txt` has the components on Unix of /// - /// * `UnixComponent::RootDir` - `is_parent() == false` - /// * `UnixComponent::ParentDir` - `is_parent() == true` - /// * `UnixComponent::CurDir` - `is_parent() == false` - /// * `UnixComponent::Normal("here.txt")` - `is_parent() == false` + /// * `Utf8UnixComponent::RootDir` - `is_parent() == false` + /// * `Utf8UnixComponent::ParentDir` - `is_parent() == true` + /// * `Utf8UnixComponent::CurDir` - `is_parent() == false` + /// * `Utf8UnixComponent::Normal("here.txt")` - `is_parent() == false` fn is_parent(&self) -> bool; /// Returns true if this component represents a relative representation of the current @@ -58,12 +58,34 @@ pub trait Utf8Component<'a>: /// /// `/my/../path/./here.txt` has the components on Unix of /// - /// * `UnixComponent::RootDir` - `is_current() == false` - /// * `UnixComponent::ParentDir` - `is_current() == false` - /// * `UnixComponent::CurDir` - `is_current() == true` - /// * `UnixComponent::Normal("here.txt")` - `is_current() == false` + /// * `Utf8UnixComponent::RootDir` - `is_current() == false` + /// * `Utf8UnixComponent::ParentDir` - `is_current() == false` + /// * `Utf8UnixComponent::CurDir` - `is_current() == true` + /// * `Utf8UnixComponent::Normal("here.txt")` - `is_current() == false` fn is_current(&self) -> bool; + /// Returns true if this component is valid. A component can only be invalid if it represents a + /// normal component with characters that are disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Utf8Component, Utf8UnixComponent, Utf8WindowsComponent}; + /// + /// assert!(Utf8UnixComponent::RootDir.is_valid()); + /// assert!(Utf8UnixComponent::ParentDir.is_valid()); + /// assert!(Utf8UnixComponent::CurDir.is_valid()); + /// assert!(Utf8UnixComponent::Normal("abc").is_valid()); + /// assert!(!Utf8UnixComponent::Normal("\0").is_valid()); + /// + /// assert!(Utf8WindowsComponent::RootDir.is_valid()); + /// assert!(Utf8WindowsComponent::ParentDir.is_valid()); + /// assert!(Utf8WindowsComponent::CurDir.is_valid()); + /// assert!(Utf8WindowsComponent::Normal("abc").is_valid()); + /// assert!(!Utf8WindowsComponent::Normal("|").is_valid()); + /// ``` + fn is_valid(&self) -> bool; + /// Returns size of component in bytes fn len(&self) -> usize; diff --git a/src/common/utf8/path.rs b/src/common/utf8/path.rs index 71809a4..ce01518 100644 --- a/src/common/utf8/path.rs +++ b/src/common/utf8/path.rs @@ -8,8 +8,8 @@ use core::{cmp, fmt}; use crate::no_std_compat::*; use crate::{ - Encoding, Path, StripPrefixError, Utf8Ancestors, Utf8Component, Utf8Components, Utf8Encoding, - Utf8Iter, Utf8PathBuf, + CheckedPathError, Encoding, Path, StripPrefixError, Utf8Ancestors, Utf8Component, + Utf8Components, Utf8Encoding, Utf8Iter, Utf8PathBuf, }; /// A slice of a path (akin to [`str`]). @@ -204,6 +204,25 @@ where !self.is_absolute() } + /// Returns `true` if the path is valid, meaning that all of its components are valid. + /// + /// See [`Utf8Component::is_valid`]'s documentation for more details. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Utf8Path, Utf8UnixEncoding}; + /// + /// // NOTE: A path cannot be created on its own without a defined encoding + /// assert!(Utf8Path::::new("foo.txt").is_valid()); + /// assert!(!Utf8Path::::new("foo\0.txt").is_valid()); + /// ``` + /// + /// [`Utf8Component::is_valid`]: crate::Utf8Component::is_valid + pub fn is_valid(&self) -> bool { + self.components().all(|c| c.is_valid()) + } + /// Returns `true` if the `Utf8Path` has a root. /// /// * On Unix ([`Utf8UnixPath`]), a path has a root if it begins with `/`. @@ -618,6 +637,40 @@ where buf } + /// Creates an owned [`Utf8PathBuf`] with `path` adjoined to `self`, checking the `path` to + /// ensure it is safe to join. _When dealing with user-provided paths, this is the preferred + /// method._ + /// + /// See [`Utf8PathBuf::push_checked`] for more details on what it means to adjoin a path + /// safely. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8Path, Utf8PathBuf, Utf8UnixEncoding}; + /// + /// // NOTE: A path cannot be created on its own without a defined encoding + /// let path = Utf8Path::::new("/etc"); + /// + /// // A valid path can be joined onto the existing one + /// assert_eq!(path.join_checked("passwd"), Ok(Utf8PathBuf::from("/etc/passwd"))); + /// + /// // An invalid path will result in an error + /// assert_eq!(path.join_checked("/sneaky/replacement"), Err(CheckedPathError::UnexpectedRoot)); + /// ``` + pub fn join_checked>>( + &self, + path: P, + ) -> Result, CheckedPathError> { + self._join_checked(path.as_ref()) + } + + fn _join_checked(&self, path: &Utf8Path) -> Result, CheckedPathError> { + let mut buf = self.to_path_buf(); + buf.push_checked(path)?; + Ok(buf) + } + /// Creates an owned [`Utf8PathBuf`] like `self` but with the given file name. /// /// See [`Utf8PathBuf::set_file_name`] for more details. @@ -807,6 +860,80 @@ where } } + /// Like [`with_encoding`], creates an owned [`Utf8PathBuf`] like `self` but with a different + /// encoding. Additionally, checks to ensure that the produced path will be valid. + /// + /// # Note + /// + /// As part of the process of converting between encodings, the path will need to be rebuilt. + /// This involves [`pushing and checking`] each component, which may result in differences in + /// the resulting path such as resolving `.` and `..` early or other unexpected side effects. + /// + /// [`pushing and checking`]: Utf8PathBuf::push_checked + /// [`with_encoding`]: Utf8Path::with_encoding + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8Path, Utf8UnixEncoding, Utf8WindowsEncoding}; + /// + /// // Convert from Unix to Windows + /// let unix_path = Utf8Path::::new("/tmp/foo.txt"); + /// let windows_path = unix_path.with_encoding_checked::().unwrap(); + /// assert_eq!(windows_path, Utf8Path::::new(r"\tmp\foo.txt")); + /// + /// // Converting from Windows to Unix will drop any prefix + /// let windows_path = Utf8Path::::new(r"C:\tmp\foo.txt"); + /// let unix_path = windows_path.with_encoding_checked::().unwrap(); + /// assert_eq!(unix_path, Utf8Path::::new(r"/tmp/foo.txt")); + /// + /// // Converting from Unix to Windows with invalid filename characters like `|` should fail + /// let unix_path = Utf8Path::::new("/|invalid|/foo.txt"); + /// assert_eq!( + /// unix_path.with_encoding_checked::(), + /// Err(CheckedPathError::InvalidFilename), + /// ); + /// + /// // Converting from Unix to Windows with unexpected prefix embedded in path should fail + /// let unix_path = Utf8Path::::new("/path/c:/foo.txt"); + /// assert_eq!( + /// unix_path.with_encoding_checked::(), + /// Err(CheckedPathError::UnexpectedPrefix), + /// ); + /// ``` + pub fn with_encoding_checked(&self) -> Result, CheckedPathError> + where + U: for<'enc> Utf8Encoding<'enc>, + { + let mut path = Utf8PathBuf::new(); + + // For root, current, and parent we specially handle to convert to the appropriate type, + // otherwise we attempt to push using the checked variant, which will ensure that the + // destination encoding is respected + for component in self.components() { + if component.is_root() { + path.push(< + <::Components as Utf8Components>::Component + as Utf8Component + >::root().as_str()); + } else if component.is_current() { + path.push(< + <::Components as Utf8Components>::Component + as Utf8Component + >::current().as_str()); + } else if component.is_parent() { + path.push(< + <::Components as Utf8Components>::Component + as Utf8Component + >::parent().as_str()); + } else { + path.push_checked(component.as_str())?; + } + } + + Ok(path) + } + /// Converts a [`Box`](Box) into a /// [`Utf8PathBuf`] without copying or allocating. pub fn into_path_buf(self: Box>) -> Utf8PathBuf { diff --git a/src/common/utf8/pathbuf.rs b/src/common/utf8/pathbuf.rs index 07e2abf..fa20f00 100644 --- a/src/common/utf8/pathbuf.rs +++ b/src/common/utf8/pathbuf.rs @@ -10,7 +10,7 @@ use core::str::FromStr; use core::{cmp, fmt}; use crate::no_std_compat::*; -use crate::{Encoding, PathBuf, Utf8Encoding, Utf8Iter, Utf8Path}; +use crate::{CheckedPathError, Encoding, PathBuf, Utf8Encoding, Utf8Iter, Utf8Path}; /// An owned, mutable path that mirrors [`std::path::PathBuf`], but operatings using a /// [`Utf8Encoding`] to determine how to parse the underlying str. @@ -180,6 +180,63 @@ where T::push(&mut self.inner, path.as_ref().as_str()); } + /// Like [`Utf8PathBuf::push`], extends `self` with `path`, but also checks to ensure that + /// `path` abides by a set of rules. + /// + /// # Rules + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename bytes. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use typed_path::{Utf8PathBuf, Utf8UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = Utf8PathBuf::::from("/tmp"); + /// + /// // Pushing a relative path works like normal + /// assert!(path.push_checked("file.bk").is_ok()); + /// assert_eq!(path, Utf8PathBuf::from("/tmp/file.bk")); + /// ``` + /// + /// Pushing a relative path that contains unresolved parent directory references fails + /// with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8PathBuf, Utf8UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = Utf8PathBuf::::from("/tmp"); + /// + /// // Pushing a relative path that contains parent directory references that cannot be + /// // resolved within the path is considered an error as this is considered a path + /// // traversal attack! + /// assert_eq!(path.push_checked(".."), Err(CheckedPathError::PathTraversalAttack)); + /// assert_eq!(path, Utf8PathBuf::from("/tmp")); + /// ``` + /// + /// Pushing an absolute path fails with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8PathBuf, Utf8UnixEncoding}; + /// + /// // NOTE: A pathbuf cannot be created on its own without a defined encoding + /// let mut path = Utf8PathBuf::::from("/tmp"); + /// + /// // Pushing an absolute path will fail with an error + /// assert_eq!(path.push_checked("/etc"), Err(CheckedPathError::UnexpectedRoot)); + /// assert_eq!(path, Utf8PathBuf::from("/tmp")); + /// ``` + pub fn push_checked>>(&mut self, path: P) -> Result<(), CheckedPathError> { + T::push_checked(&mut self.inner, path.as_ref().as_str()) + } + /// Truncates `self` to [`self.parent`]. /// /// Returns `false` and does nothing if [`self.parent`] is [`None`]. diff --git a/src/typed/non_utf8/path.rs b/src/typed/non_utf8/path.rs index a554abb..a3b0ed3 100644 --- a/src/typed/non_utf8/path.rs +++ b/src/typed/non_utf8/path.rs @@ -7,7 +7,7 @@ use std::path::Path; #[cfg(all(feature = "std", not(target_family = "wasm")))] use std::io; -use crate::common::StripPrefixError; +use crate::common::{CheckedPathError, StripPrefixError}; use crate::convert::TryAsRef; use crate::typed::{PathType, TypedAncestors, TypedComponents, TypedIter, TypedPathBuf}; use crate::unix::UnixPath; @@ -573,6 +573,42 @@ impl<'a> TypedPath<'a> { } } + /// Creates an owned [`TypedPathBuf`] with `path` adjoined to `self`, checking the `path` to + /// ensure it is safe to join. _When dealing with user-provided paths, this is the preferred + /// method._ + /// + /// See [`TypedPathBuf::push_checked`] for more details on what it means to adjoin a path + /// safely. + /// + /// # Difference from Path + /// + /// Unlike [`Path::join_checked`], this implementation only supports types that implement + /// `AsRef<[u8]>` instead of `AsRef`. + /// + /// [`Path::join_checked`]: crate::Path::join_checked + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, TypedPath, TypedPathBuf}; + /// + /// assert_eq!( + /// TypedPath::derive("/etc").join_checked("passwd"), + /// Ok(TypedPathBuf::from("/etc/passwd")), + /// ); + /// + /// assert_eq!( + /// TypedPath::derive("/etc").join_checked("/sneaky/path"), + /// Err(CheckedPathError::UnexpectedRoot), + /// ); + /// ``` + pub fn join_checked(&self, path: impl AsRef<[u8]>) -> Result { + Ok(match self { + Self::Unix(p) => TypedPathBuf::Unix(p.join_checked(UnixPath::new(&path))?), + Self::Windows(p) => TypedPathBuf::Windows(p.join_checked(WindowsPath::new(&path))?), + }) + } + /// Creates an owned [`TypedPathBuf`] like `self` but with the given file name. /// /// See [`TypedPathBuf::set_file_name`] for more details. @@ -741,13 +777,31 @@ impl<'a> TypedPath<'a> { } } - /// Converts this [`TypedPath`] into the Unix variant of [`TypedPathBuf`]. + /// Converts this [`TypedPath`] into the Unix variant of [`TypedPathBuf`], ensuring it is a + /// valid Unix path. + pub fn with_unix_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + Self::Windows(p) => TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + }) + } + + /// Converts this [`TypedPath`] into the Windows variant of [`TypedPathBuf`]. pub fn with_windows_encoding(&self) -> TypedPathBuf { match self { Self::Unix(p) => TypedPathBuf::Windows(p.with_windows_encoding()), _ => self.to_path_buf(), } } + + /// Converts this [`TypedPath`] into the Windows variant of [`TypedPathBuf`], ensuring it is a + /// valid Windows path. + pub fn with_windows_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + Self::Windows(p) => TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + }) + } } impl<'a> From<&'a [u8]> for TypedPath<'a> { diff --git a/src/typed/non_utf8/pathbuf.rs b/src/typed/non_utf8/pathbuf.rs index 6bad34f..4984463 100644 --- a/src/typed/non_utf8/pathbuf.rs +++ b/src/typed/non_utf8/pathbuf.rs @@ -5,7 +5,7 @@ use core::convert::TryFrom; #[cfg(feature = "std")] use std::{io, path::PathBuf}; -use crate::common::StripPrefixError; +use crate::common::{CheckedPathError, StripPrefixError}; use crate::no_std_compat::*; use crate::typed::{PathType, TypedAncestors, TypedComponents, TypedIter, TypedPath}; use crate::unix::{UnixPath, UnixPathBuf}; @@ -42,7 +42,15 @@ impl TypedPathBuf { } } - /// Converts this [`TypedPathBuf`] into the Unix variant. + /// Converts this [`TypedPathBuf`] into the Unix variant, ensuring it is a valid Unix path. + pub fn with_unix_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + Self::Windows(p) => TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + }) + } + + /// Converts this [`TypedPathBuf`] into the Windows variant. pub fn with_windows_encoding(&self) -> TypedPathBuf { match self { Self::Unix(p) => TypedPathBuf::Windows(p.with_windows_encoding()), @@ -50,6 +58,15 @@ impl TypedPathBuf { } } + /// Converts this [`TypedPathBuf`] into the Windows variant, ensuring it is a valid Windows + /// path. + pub fn with_windows_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + Self::Windows(p) => TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + }) + } + /// Allocates an empty [`TypedPathBuf`] for the specified path type. /// /// # Examples @@ -161,6 +178,68 @@ impl TypedPathBuf { } } + /// Like [`TypedPathBuf::push`], extends `self` with `path`, but also checks to ensure that + /// `path` abides by a set of rules. + /// + /// # Rules + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename bytes. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + /// + /// # Difference from PathBuf + /// + /// Unlike [`PathBuf::push_checked`], this implementation only supports types that implement + /// `AsRef<[u8]>` instead of `AsRef`. + /// + /// [`PathBuf::push_checked`]: crate::PathBuf::push_checked + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use typed_path::TypedPathBuf; + /// + /// let mut path = TypedPathBuf::from_unix("/tmp"); + /// assert!(path.push_checked("file.bk").is_ok()); + /// assert_eq!(path, TypedPathBuf::from_unix("/tmp/file.bk")); + /// ``` + /// + /// Pushing a relative path that contains unresolved parent directory references fails + /// with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, TypedPathBuf}; + /// + /// let mut path = TypedPathBuf::from_unix("/tmp"); + /// + /// // Pushing a relative path that contains parent directory references that cannot be + /// // resolved within the path is considered an error as this is considered a path + /// // traversal attack! + /// assert_eq!(path.push_checked(".."), Err(CheckedPathError::PathTraversalAttack)); + /// assert_eq!(path, TypedPathBuf::from("/tmp")); + /// ``` + /// + /// Pushing an absolute path fails with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, TypedPathBuf}; + /// + /// let mut path = TypedPathBuf::from_unix("/tmp"); + /// + /// // Pushing an absolute path will fail with an error + /// assert_eq!(path.push_checked("/etc"), Err(CheckedPathError::UnexpectedRoot)); + /// assert_eq!(path, TypedPathBuf::from_unix("/tmp")); + /// ``` + pub fn push_checked(&mut self, path: impl AsRef<[u8]>) -> Result<(), CheckedPathError> { + match self { + Self::Unix(a) => a.push_checked(UnixPath::new(&path)), + Self::Windows(a) => a.push_checked(WindowsPath::new(&path)), + } + } + /// Truncates `self` to [`self.parent`]. /// /// Returns `false` and does nothing if [`self.parent`] is [`None`]. @@ -780,6 +859,41 @@ impl TypedPathBuf { self.to_path().join(path) } + /// Creates an owned [`TypedPathBuf`] with `path` adjoined to `self`, checking the `path` to + /// ensure it is safe to join. _When dealing with user-provided paths, this is the preferred + /// method._ + /// + /// See [`TypedPathBuf::push_checked`] for more details on what it means to adjoin a path + /// safely. + /// + /// # Difference from Path + /// + /// Unlike [`Path::join_checked`], this implementation only supports types that implement + /// `AsRef<[u8]>` instead of `AsRef`. + /// + /// [`Path::join_checked`]: crate::Path::join_checked + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, TypedPathBuf}; + /// + /// // Valid path will join successfully + /// assert_eq!( + /// TypedPathBuf::from("/etc").join_checked("passwd"), + /// Ok(TypedPathBuf::from("/etc/passwd")), + /// ); + /// + /// // Invalid path will fail to join + /// assert_eq!( + /// TypedPathBuf::from("/etc").join_checked("/sneaky/path"), + /// Err(CheckedPathError::UnexpectedRoot), + /// ); + /// ``` + pub fn join_checked(&self, path: impl AsRef<[u8]>) -> Result { + self.to_path().join_checked(path) + } + /// Creates an owned [`TypedPathBuf`] like `self` but with the given file name. /// /// See [`TypedPathBuf::set_file_name`] for more details. diff --git a/src/typed/utf8/path.rs b/src/typed/utf8/path.rs index bf5dfc7..4f9180a 100644 --- a/src/typed/utf8/path.rs +++ b/src/typed/utf8/path.rs @@ -3,7 +3,7 @@ use core::fmt; #[cfg(feature = "std")] use std::path::Path; -use crate::common::StripPrefixError; +use crate::common::{CheckedPathError, StripPrefixError}; use crate::convert::TryAsRef; use crate::typed::{ PathType, Utf8TypedAncestors, Utf8TypedComponents, Utf8TypedIter, Utf8TypedPathBuf, @@ -504,10 +504,10 @@ impl<'a> Utf8TypedPath<'a> { /// /// # Difference from Path /// - /// Unlike [`Path::join`], this implementation only supports types that implement + /// Unlike [`Utf8Path::join`], this implementation only supports types that implement /// `AsRef` instead of `AsRef`. /// - /// [`Path::join`]: crate::Path::join + /// [`Utf8Path::join`]: crate::Utf8Path::join /// /// # Examples /// @@ -526,6 +526,47 @@ impl<'a> Utf8TypedPath<'a> { } } + /// Creates an owned [`Utf8TypedPathBuf`] with `path` adjoined to `self`, checking the `path` to + /// ensure it is safe to join. _When dealing with user-provided paths, this is the preferred + /// method._ + /// + /// See [`Utf8TypedPathBuf::push_checked`] for more details on what it means to adjoin a path + /// safely. + /// + /// # Difference from Path + /// + /// Unlike [`Utf8Path::join_checked`], this implementation only supports types that implement + /// `AsRef` instead of `AsRef`. + /// + /// [`Utf8Path::join_checked`]: crate::Utf8Path::join_checked + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8TypedPath, Utf8TypedPathBuf}; + /// + /// assert_eq!( + /// Utf8TypedPath::derive("/etc").join_checked("passwd"), + /// Ok(Utf8TypedPathBuf::from("/etc/passwd")), + /// ); + /// + /// assert_eq!( + /// Utf8TypedPath::derive("/etc").join_checked("/sneaky/path"), + /// Err(CheckedPathError::UnexpectedRoot), + /// ); + /// ``` + pub fn join_checked( + &self, + path: impl AsRef, + ) -> Result { + Ok(match self { + Self::Unix(p) => Utf8TypedPathBuf::Unix(p.join_checked(Utf8UnixPath::new(&path))?), + Self::Windows(p) => { + Utf8TypedPathBuf::Windows(p.join_checked(Utf8WindowsPath::new(&path))?) + } + }) + } + /// Creates an owned [`Utf8TypedPathBuf`] like `self` but with the given file name. /// /// See [`Utf8TypedPathBuf::set_file_name`] for more details. @@ -658,13 +699,31 @@ impl<'a> Utf8TypedPath<'a> { } } - /// Converts this [`Utf8TypedPath`] into the Unix variant of [`Utf8TypedPathBuf`]. + /// Converts this [`Utf8TypedPath`] into the Unix variant of [`Utf8TypedPathBuf`], ensuring it + /// is a valid Unix path. + pub fn with_unix_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => Utf8TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + Self::Windows(p) => Utf8TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + }) + } + + /// Converts this [`Utf8TypedPath`] into the Windows variant of [`Utf8TypedPathBuf`]. pub fn with_windows_encoding(&self) -> Utf8TypedPathBuf { match self { Self::Unix(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding()), _ => self.to_path_buf(), } } + + /// Converts this [`Utf8TypedPath`] into the Windows variant of [`Utf8TypedPathBuf`], ensuring + /// it is a valid Windows path. + pub fn with_windows_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + Self::Windows(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + }) + } } impl fmt::Display for Utf8TypedPath<'_> { diff --git a/src/typed/utf8/pathbuf.rs b/src/typed/utf8/pathbuf.rs index 942bac2..7509922 100644 --- a/src/typed/utf8/pathbuf.rs +++ b/src/typed/utf8/pathbuf.rs @@ -5,7 +5,7 @@ use core::fmt; #[cfg(feature = "std")] use std::path::PathBuf; -use crate::common::StripPrefixError; +use crate::common::{CheckedPathError, StripPrefixError}; use crate::no_std_compat::*; use crate::typed::{ PathType, Utf8TypedAncestors, Utf8TypedComponents, Utf8TypedIter, Utf8TypedPath, @@ -44,7 +44,16 @@ impl Utf8TypedPathBuf { } } - /// Converts this [`Utf8TypedPathBuf`] into the Unix variant. + /// Converts this [`Utf8TypedPathBuf`] into the Unix variant, ensuring it is valid as a Unix + /// path. + pub fn with_unix_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => Utf8TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + Self::Windows(p) => Utf8TypedPathBuf::Unix(p.with_unix_encoding_checked()?), + }) + } + + /// Converts this [`Utf8TypedPathBuf`] into the Windows variant. pub fn with_windows_encoding(&self) -> Utf8TypedPathBuf { match self { Self::Unix(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding()), @@ -52,6 +61,15 @@ impl Utf8TypedPathBuf { } } + /// Converts this [`Utf8TypedPathBuf`] into the Windows variant, ensuring it is valid as a + /// Windows path. + pub fn with_windows_encoding_checked(&self) -> Result { + Ok(match self { + Self::Unix(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + Self::Windows(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding_checked()?), + }) + } + /// Allocates an empty [`Utf8TypedPathBuf`] for the specified path type. /// /// # Examples @@ -163,6 +181,68 @@ impl Utf8TypedPathBuf { } } + /// Like [`Utf8TypedPathBuf::push`], extends `self` with `path`, but also checks to ensure that + /// `path` abides by a set of rules. + /// + /// # Rules + /// + /// 1. `path` cannot contain a prefix component. + /// 2. `path` cannot contain a root component. + /// 3. `path` cannot contain invalid filename bytes. + /// 4. `path` cannot contain parent components such that the current path would be escaped. + /// + /// # Difference from PathBuf + /// + /// Unlike [`PathBuf::push_checked`], this implementation only supports types that implement + /// `AsRef` instead of `AsRef`. + /// + /// [`PathBuf::push_checked`]: crate::PathBuf::push_checked + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use typed_path::Utf8TypedPathBuf; + /// + /// let mut path = Utf8TypedPathBuf::from_unix("/tmp"); + /// assert!(path.push_checked("file.bk").is_ok()); + /// assert_eq!(path, Utf8TypedPathBuf::from_unix("/tmp/file.bk")); + /// ``` + /// + /// Pushing a relative path that contains unresolved parent directory references fails + /// with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8TypedPathBuf}; + /// + /// let mut path = Utf8TypedPathBuf::from_unix("/tmp"); + /// + /// // Pushing a relative path that contains parent directory references that cannot be + /// // resolved within the path is considered an error as this is considered a path + /// // traversal attack! + /// assert_eq!(path.push_checked(".."), Err(CheckedPathError::PathTraversalAttack)); + /// assert_eq!(path, Utf8TypedPathBuf::from("/tmp")); + /// ``` + /// + /// Pushing an absolute path fails with an error: + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8TypedPathBuf}; + /// + /// let mut path = Utf8TypedPathBuf::from_unix("/tmp"); + /// + /// // Pushing an absolute path will fail with an error + /// assert_eq!(path.push_checked("/etc"), Err(CheckedPathError::UnexpectedRoot)); + /// assert_eq!(path, Utf8TypedPathBuf::from_unix("/tmp")); + /// ``` + pub fn push_checked(&mut self, path: impl AsRef) -> Result<(), CheckedPathError> { + match self { + Self::Unix(a) => a.push_checked(Utf8UnixPath::new(&path)), + Self::Windows(a) => a.push_checked(Utf8WindowsPath::new(&path)), + } + } + /// Truncates `self` to [`self.parent`]. /// /// Returns `false` and does nothing if [`self.parent`] is [`None`]. @@ -740,6 +820,44 @@ impl Utf8TypedPathBuf { self.to_path().join(path) } + /// Creates an owned [`Utf8TypedPathBuf`] with `path` adjoined to `self`, checking the `path` + /// to ensure it is safe to join. _When dealing with user-provided paths, this is the preferred + /// method._ + /// + /// See [`Utf8TypedPathBuf::push_checked`] for more details on what it means to adjoin a path + /// safely. + /// + /// # Difference from Path + /// + /// Unlike [`Utf8Path::join_checked`], this implementation only supports types that implement + /// `AsRef` instead of `AsRef`. + /// + /// [`Utf8Path::join_checked`]: crate::Utf8Path::join_checked + /// + /// # Examples + /// + /// ``` + /// use typed_path::{CheckedPathError, Utf8TypedPathBuf}; + /// + /// // Valid path will join successfully + /// assert_eq!( + /// Utf8TypedPathBuf::from("/etc").join_checked("passwd"), + /// Ok(Utf8TypedPathBuf::from("/etc/passwd")), + /// ); + /// + /// // Invalid path will fail to join + /// assert_eq!( + /// Utf8TypedPathBuf::from("/etc").join_checked("/sneaky/path"), + /// Err(CheckedPathError::UnexpectedRoot), + /// ); + /// ``` + pub fn join_checked( + &self, + path: impl AsRef, + ) -> Result { + self.to_path().join_checked(path) + } + /// Creates an owned [`Utf8TypedPathBuf`] like `self` but with the given file name. /// /// See [`Utf8TypedPathBuf::set_file_name`] for more details. diff --git a/src/unix/non_utf8.rs b/src/unix/non_utf8.rs index 5a4060d..cb462cf 100644 --- a/src/unix/non_utf8.rs +++ b/src/unix/non_utf8.rs @@ -6,6 +6,7 @@ use core::hash::Hasher; pub use components::*; use super::constants::*; +use crate::common::CheckedPathError; use crate::no_std_compat::*; use crate::typed::{TypedPath, TypedPathBuf}; use crate::{private, Components, Encoding, Path, PathBuf}; @@ -87,6 +88,35 @@ impl<'a> Encoding<'a> for UnixEncoding { current_path.extend_from_slice(path); } + + fn push_checked(current_path: &mut Vec, path: &[u8]) -> Result<(), CheckedPathError> { + // As we scan through path components, we maintain a count of normal components that + // have not been popped off as a result of a parent component. If we ever reach a + // parent component without any preceding normal components remaining, this violates + // pushing onto our path and represents a path traversal attack. + let mut normal_cnt = 0; + for component in UnixPath::new(path).components() { + match component { + UnixComponent::RootDir => return Err(CheckedPathError::UnexpectedRoot), + UnixComponent::ParentDir if normal_cnt == 0 => { + return Err(CheckedPathError::PathTraversalAttack) + } + UnixComponent::ParentDir => normal_cnt -= 1, + UnixComponent::Normal(bytes) => { + for b in bytes { + if DISALLOWED_FILENAME_BYTES.contains(b) { + return Err(CheckedPathError::InvalidFilename); + } + } + normal_cnt += 1; + } + _ => continue, + } + } + + Self::push(current_path, path); + Ok(()) + } } impl fmt::Debug for UnixEncoding { @@ -125,6 +155,14 @@ where pub fn with_unix_encoding(&self) -> PathBuf { self.with_encoding() } + + /// Creates an owned [`PathBuf`] like `self` but using [`UnixEncoding`], ensuring it is a valid + /// Unix path. + /// + /// See [`Path::with_encoding_checked`] for more information. + pub fn with_unix_encoding_checked(&self) -> Result, CheckedPathError> { + self.with_encoding_checked() + } } impl UnixPath { @@ -176,4 +214,114 @@ mod tests { UnixEncoding::push(&mut current_path, b"abc"); assert_eq!(current_path, b"some/path/abc"); } + + #[test] + fn push_checked_should_fail_if_providing_an_absolute_path() { + // Empty current path will fail when pushing an absolute path + let mut current_path = vec![]; + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing an absolute path + let mut current_path = b"some/path".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, b"some/path"); + + // Non-empty absolute current path will fail when pushing an absolute path + let mut current_path = b"/some/path/".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, b"/some/path/"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_disallowed_filename_bytes() { + // Empty current path will fail when pushing a path containing disallowed filename bytes + let mut current_path = vec![]; + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = b"some/path".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, b"some/path"); + + // Non-empty absolute current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = b"/some/path/".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, b"/some/path/"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_that_would_escape_the_current_path() { + // Empty current path will fail when pushing a path that would escape + let mut current_path = vec![]; + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing a path that would escape + let mut current_path = b"some/path".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, b"some/path"); + + // Non-empty absolute current path will fail when pushing a path that would escape + let mut current_path = b"/some/path/".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, b"/some/path/"); + } + + #[test] + fn push_checked_should_append_path_to_current_path_with_a_separator_if_does_not_violate_rules() + { + // Pushing a path that contains parent dirs, but does not escape the current path, + // should succeed + let mut current_path = vec![]; + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, b"abc/../def/."); + + let mut current_path = b"some/path".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, b"some/path/abc/../def/."); + + let mut current_path = b"/some/path/".to_vec(); + assert_eq!( + UnixEncoding::push_checked(&mut current_path, b"abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, b"/some/path/abc/../def/."); + } } diff --git a/src/unix/non_utf8/components/component.rs b/src/unix/non_utf8/components/component.rs index dd72f32..8dd6114 100644 --- a/src/unix/non_utf8/components/component.rs +++ b/src/unix/non_utf8/components/component.rs @@ -1,4 +1,4 @@ -use crate::unix::constants::{CURRENT_DIR, PARENT_DIR, SEPARATOR_STR}; +use crate::unix::constants::{CURRENT_DIR, DISALLOWED_FILENAME_BYTES, PARENT_DIR, SEPARATOR_STR}; use crate::unix::UnixComponents; use crate::{private, Component, Encoding, ParseError, Path}; @@ -122,6 +122,29 @@ impl<'a> Component<'a> for UnixComponent<'a> { matches!(self, Self::CurDir) } + /// Returns true if this component is valid. + /// + /// A component can only be invalid if it represents a normal component with bytes that are + /// disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Component, UnixComponent}; + /// + /// assert!(UnixComponent::RootDir.is_valid()); + /// assert!(UnixComponent::ParentDir.is_valid()); + /// assert!(UnixComponent::CurDir.is_valid()); + /// assert!(UnixComponent::Normal(b"abc").is_valid()); + /// assert!(!UnixComponent::Normal(b"\0").is_valid()); + /// ``` + fn is_valid(&self) -> bool { + match self { + Self::RootDir | Self::ParentDir | Self::CurDir => true, + Self::Normal(bytes) => !bytes.iter().any(|b| DISALLOWED_FILENAME_BYTES.contains(b)), + } + } + fn len(&self) -> usize { self.as_bytes().len() } diff --git a/src/unix/utf8.rs b/src/unix/utf8.rs index 31e88a1..b79e147 100644 --- a/src/unix/utf8.rs +++ b/src/unix/utf8.rs @@ -5,6 +5,7 @@ use core::hash::Hasher; pub use components::*; +use crate::common::CheckedPathError; use crate::no_std_compat::*; use crate::typed::{Utf8TypedPath, Utf8TypedPathBuf}; use crate::{private, Encoding, UnixEncoding, Utf8Encoding, Utf8Path, Utf8PathBuf}; @@ -41,6 +42,10 @@ impl<'a> Utf8Encoding<'a> for Utf8UnixEncoding { UnixEncoding::push(current_path.as_mut_vec(), path.as_bytes()); } } + + fn push_checked(current_path: &mut String, path: &str) -> Result<(), CheckedPathError> { + unsafe { UnixEncoding::push_checked(current_path.as_mut_vec(), path.as_bytes()) } + } } impl fmt::Debug for Utf8UnixEncoding { @@ -79,6 +84,16 @@ where pub fn with_unix_encoding(&self) -> Utf8PathBuf { self.with_encoding() } + + /// Creates an owned [`Utf8PathBuf`] like `self` but using [`Utf8UnixEncoding`], ensuring it is + /// a valid Unix path. + /// + /// See [`Utf8Path::with_encoding_checked`] for more information. + pub fn with_unix_encoding_checked( + &self, + ) -> Result, CheckedPathError> { + self.with_encoding_checked() + } } impl Utf8UnixPath { @@ -130,4 +145,114 @@ mod tests { Utf8UnixEncoding::push(&mut current_path, "abc"); assert_eq!(current_path, "some/path/abc"); } + + #[test] + fn push_checked_should_fail_if_providing_an_absolute_path() { + // Empty current path will fail when pushing an absolute path + let mut current_path = String::new(); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing an absolute path + let mut current_path = String::from("some/path"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, "some/path"); + + // Non-empty absolute current path will fail when pushing an absolute path + let mut current_path = String::from("/some/path/"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "/abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, "/some/path/"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_disallowed_filename_characters() { + // Empty current path will fail when pushing a path containing disallowed filename chars + let mut current_path = String::new(); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = String::from("some/path"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, "some/path"); + + // Non-empty absolute current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = String::from("/some/path/"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "some/inva\0lid/path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, "/some/path/"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_that_would_escape_the_current_path() { + // Empty current path will fail when pushing a path that would escape + let mut current_path = String::new(); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing a path that would escape + let mut current_path = String::from("some/path"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, "some/path"); + + // Non-empty absolute current path will fail when pushing a path that would escape + let mut current_path = String::from("/some/path/"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, "/some/path/"); + } + + #[test] + fn push_checked_should_append_path_to_current_path_with_a_separator_if_does_not_violate_rules() + { + // Pushing a path that contains parent dirs, but does not escape the current path, + // should succeed + let mut current_path = String::new(); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, "abc/../def/."); + + let mut current_path = String::from("some/path"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, "some/path/abc/../def/."); + + let mut current_path = String::from("/some/path/"); + assert_eq!( + Utf8UnixEncoding::push_checked(&mut current_path, "abc/../def/."), + Ok(()), + ); + assert_eq!(current_path, "/some/path/abc/../def/."); + } } diff --git a/src/unix/utf8/components/component.rs b/src/unix/utf8/components/component.rs index 80e374c..ca81449 100644 --- a/src/unix/utf8/components/component.rs +++ b/src/unix/utf8/components/component.rs @@ -1,7 +1,9 @@ use core::fmt; use core::str::Utf8Error; -use crate::unix::constants::{CURRENT_DIR_STR, PARENT_DIR_STR, SEPARATOR_STR}; +use crate::unix::constants::{ + CURRENT_DIR_STR, DISALLOWED_FILENAME_CHARS, PARENT_DIR_STR, SEPARATOR_STR, +}; use crate::unix::{UnixComponent, Utf8UnixComponents}; use crate::{private, ParseError, Utf8Component, Utf8Encoding, Utf8Path}; @@ -203,6 +205,29 @@ impl<'a> Utf8Component<'a> for Utf8UnixComponent<'a> { matches!(self, Self::CurDir) } + /// Returns true if this component is valid. + /// + /// A component can only be invalid if it represents a normal component with characters that + /// are disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Utf8Component, Utf8UnixComponent}; + /// + /// assert!(Utf8UnixComponent::RootDir.is_valid()); + /// assert!(Utf8UnixComponent::ParentDir.is_valid()); + /// assert!(Utf8UnixComponent::CurDir.is_valid()); + /// assert!(Utf8UnixComponent::Normal("abc").is_valid()); + /// assert!(!Utf8UnixComponent::Normal("\0").is_valid()); + /// ``` + fn is_valid(&self) -> bool { + match self { + Self::RootDir | Self::ParentDir | Self::CurDir => true, + Self::Normal(s) => !s.chars().any(|c| DISALLOWED_FILENAME_CHARS.contains(&c)), + } + } + fn len(&self) -> usize { self.as_str().len() } diff --git a/src/windows/non_utf8.rs b/src/windows/non_utf8.rs index 0d3ed80..c569b9d 100644 --- a/src/windows/non_utf8.rs +++ b/src/windows/non_utf8.rs @@ -6,6 +6,7 @@ use core::hash::{Hash, Hasher}; pub use components::*; use super::constants::*; +use crate::common::CheckedPathError; use crate::no_std_compat::*; use crate::typed::{TypedPath, TypedPathBuf}; use crate::{private, Component, Components, Encoding, Path, PathBuf}; @@ -192,6 +193,36 @@ impl<'a> Encoding<'a> for WindowsEncoding { current_path.extend_from_slice(path); } } + + fn push_checked(current_path: &mut Vec, path: &[u8]) -> Result<(), CheckedPathError> { + // As we scan through path components, we maintain a count of normal components that + // have not been popped off as a result of a parent component. If we ever reach a + // parent component without any preceding normal components remaining, this violates + // pushing onto our path and represents a path traversal attack. + let mut normal_cnt = 0; + for component in WindowsPath::new(path).components() { + match component { + WindowsComponent::Prefix(_) => return Err(CheckedPathError::UnexpectedPrefix), + WindowsComponent::RootDir => return Err(CheckedPathError::UnexpectedRoot), + WindowsComponent::ParentDir if normal_cnt == 0 => { + return Err(CheckedPathError::PathTraversalAttack) + } + WindowsComponent::ParentDir => normal_cnt -= 1, + WindowsComponent::Normal(bytes) => { + for b in bytes { + if DISALLOWED_FILENAME_BYTES.contains(b) { + return Err(CheckedPathError::InvalidFilename); + } + } + normal_cnt += 1; + } + _ => continue, + } + } + + Self::push(current_path, path); + Ok(()) + } } impl fmt::Debug for WindowsEncoding { @@ -230,6 +261,16 @@ where pub fn with_windows_encoding(&self) -> PathBuf { self.with_encoding() } + + /// Creates an owned [`PathBuf`] like `self` but using [`WindowsEncoding`], ensuring it is a + /// valid Windows path. + /// + /// See [`Path::with_encoding_checked`] for more information. + pub fn with_windows_encoding_checked( + &self, + ) -> Result, CheckedPathError> { + self.with_encoding_checked() + } } impl WindowsPath { @@ -241,3 +282,145 @@ impl WindowsPath { TypedPathBuf::from_windows(self) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn push_checked_should_fail_if_providing_an_absolute_path() { + // Empty current path will fail when pushing an absolute path + let mut current_path = vec![]; + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing an absolute path + let mut current_path = br"some\path".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, br"some\path"); + + // Non-empty absolute current path will fail when pushing an absolute path + let mut current_path = br"\some\path\".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, br"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_an_embedded_prefix() { + // Empty current path will fail when pushing a path with a prefix + let mut current_path = vec![]; + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing a path with a prefix + let mut current_path = br"some\path".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, br"some\path"); + + // Non-empty absolute current path will fail when pushing a path with a prefix + let mut current_path = br"\some\path\".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, br"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_disallowed_filename_bytes() { + // Empty current path will fail when pushing a path containing disallowed filename bytes + let mut current_path = vec![]; + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = br"some\path".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, br"some\path"); + + // Non-empty absolute current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = br"\some\path\".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, br"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_that_would_escape_the_current_path() { + // Empty current path will fail when pushing a path that would escape + let mut current_path = vec![]; + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, b""); + + // Non-empty relative current path will fail when pushing a path that would escape + let mut current_path = br"some\path".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, br"some\path"); + + // Non-empty absolute current path will fail when pushing a path that would escape + let mut current_path = br"\some\path\".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, b".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, br"\some\path\"); + } + + #[test] + fn push_checked_should_append_path_to_current_path_with_a_separator_if_does_not_violate_rules() + { + // Pushing a path that contains parent dirs, but does not escape the current path, + // should succeed + let mut current_path = vec![]; + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, br"abc\..\def\."); + + let mut current_path = br"some\path".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, br"some\path\abc\..\def\."); + + let mut current_path = br"\some\path\".to_vec(); + assert_eq!( + WindowsEncoding::push_checked(&mut current_path, br"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, br"\some\path\abc\..\def\."); + } +} diff --git a/src/windows/non_utf8/components/component.rs b/src/windows/non_utf8/components/component.rs index 5eb2dd3..bd8215d 100644 --- a/src/windows/non_utf8/components/component.rs +++ b/src/windows/non_utf8/components/component.rs @@ -3,7 +3,9 @@ use core::convert::TryFrom; pub use prefix::{WindowsPrefix, WindowsPrefixComponent}; -use crate::windows::constants::{CURRENT_DIR, PARENT_DIR, SEPARATOR_STR}; +use crate::windows::constants::{ + CURRENT_DIR, DISALLOWED_FILENAME_BYTES, PARENT_DIR, SEPARATOR_STR, +}; use crate::windows::WindowsComponents; use crate::{private, Component, Encoding, ParseError, Path}; @@ -169,6 +171,31 @@ impl<'a> Component<'a> for WindowsComponent<'a> { matches!(self, Self::CurDir) } + /// Returns true if this component is valid. + /// + /// A component can only be invalid if it represents a normal component with bytes that are + /// disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Component, WindowsComponent}; + /// use std::convert::TryFrom; + /// + /// assert!(WindowsComponent::try_from("c:").unwrap().is_valid()); + /// assert!(WindowsComponent::RootDir.is_valid()); + /// assert!(WindowsComponent::ParentDir.is_valid()); + /// assert!(WindowsComponent::CurDir.is_valid()); + /// assert!(WindowsComponent::Normal(b"abc").is_valid()); + /// assert!(!WindowsComponent::Normal(b"|").is_valid()); + /// ``` + fn is_valid(&self) -> bool { + match self { + Self::Prefix(_) | Self::RootDir | Self::ParentDir | Self::CurDir => true, + Self::Normal(bytes) => !bytes.iter().any(|b| DISALLOWED_FILENAME_BYTES.contains(b)), + } + } + fn len(&self) -> usize { self.as_bytes().len() } diff --git a/src/windows/utf8.rs b/src/windows/utf8.rs index 2797578..35d8fa5 100644 --- a/src/windows/utf8.rs +++ b/src/windows/utf8.rs @@ -5,6 +5,7 @@ use core::hash::Hasher; pub use components::*; +use crate::common::CheckedPathError; use crate::no_std_compat::*; use crate::typed::{Utf8TypedPath, Utf8TypedPathBuf}; use crate::{private, Encoding, Utf8Encoding, Utf8Path, Utf8PathBuf, WindowsEncoding}; @@ -41,6 +42,10 @@ impl<'a> Utf8Encoding<'a> for Utf8WindowsEncoding { WindowsEncoding::push(current_path.as_mut_vec(), path.as_bytes()); } } + + fn push_checked(current_path: &mut String, path: &str) -> Result<(), CheckedPathError> { + unsafe { WindowsEncoding::push_checked(current_path.as_mut_vec(), path.as_bytes()) } + } } impl fmt::Debug for Utf8WindowsEncoding { @@ -79,6 +84,16 @@ where pub fn with_windows_encoding(&self) -> Utf8PathBuf { self.with_encoding() } + + /// Creates an owned [`Utf8PathBuf`] like `self` but using [`Utf8WindowsEncoding`], ensuring it + /// is a valid Windows path. + /// + /// See [`Utf8Path::with_encoding_checked`] for more information. + pub fn with_windows_encoding_checked( + &self, + ) -> Result, CheckedPathError> { + self.with_encoding_checked() + } } impl Utf8WindowsPath { @@ -90,3 +105,145 @@ impl Utf8WindowsPath { Utf8TypedPathBuf::from_windows(self) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn push_checked_should_fail_if_providing_an_absolute_path() { + // Empty current path will fail when pushing an absolute path + let mut current_path = String::new(); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing an absolute path + let mut current_path = String::from(r"some\path"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, r"some\path"); + + // Non-empty absolute current path will fail when pushing an absolute path + let mut current_path = String::from(r"\some\path\"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"\abc"), + Err(CheckedPathError::UnexpectedRoot) + ); + assert_eq!(current_path, r"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_an_embedded_prefix() { + // Empty current path will fail when pushing a path with a prefix + let mut current_path = String::new(); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing a path with a prefix + let mut current_path = String::from(r"some\path"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, r"some\path"); + + // Non-empty absolute current path will fail when pushing a path with a prefix + let mut current_path = String::from(r"\some\path\"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"C:abc"), + Err(CheckedPathError::UnexpectedPrefix) + ); + assert_eq!(current_path, r"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_with_disallowed_filename_bytes() { + // Empty current path will fail when pushing a path containing disallowed filename bytes + let mut current_path = String::new(); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = String::from(r"some\path"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, r"some\path"); + + // Non-empty absolute current path will fail when pushing a path containing disallowed + // filename bytes + let mut current_path = String::from(r"\some\path\"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"some\inva|lid\path"), + Err(CheckedPathError::InvalidFilename) + ); + assert_eq!(current_path, r"\some\path\"); + } + + #[test] + fn push_checked_should_fail_if_providing_a_path_that_would_escape_the_current_path() { + // Empty current path will fail when pushing a path that would escape + let mut current_path = String::new(); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, ""); + + // Non-empty relative current path will fail when pushing a path that would escape + let mut current_path = String::from(r"some\path"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, r"some\path"); + + // Non-empty absolute current path will fail when pushing a path that would escape + let mut current_path = String::from(r"\some\path\"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, ".."), + Err(CheckedPathError::PathTraversalAttack) + ); + assert_eq!(current_path, r"\some\path\"); + } + + #[test] + fn push_checked_should_append_path_to_current_path_with_a_separator_if_does_not_violate_rules() + { + // Pushing a path that contains parent dirs, but does not escape the current path, + // should succeed + let mut current_path = String::new(); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, r"abc\..\def\."); + + let mut current_path = String::from(r"some\path"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, r"some\path\abc\..\def\."); + + let mut current_path = String::from(r"\some\path\"); + assert_eq!( + Utf8WindowsEncoding::push_checked(&mut current_path, r"abc\..\def\."), + Ok(()), + ); + assert_eq!(current_path, r"\some\path\abc\..\def\."); + } +} diff --git a/src/windows/utf8/components/component.rs b/src/windows/utf8/components/component.rs index 1defa2e..fe10876 100644 --- a/src/windows/utf8/components/component.rs +++ b/src/windows/utf8/components/component.rs @@ -5,7 +5,9 @@ use core::str::Utf8Error; pub use prefix::{Utf8WindowsPrefix, Utf8WindowsPrefixComponent}; -use crate::windows::constants::{CURRENT_DIR_STR, PARENT_DIR_STR, SEPARATOR_STR}; +use crate::windows::constants::{ + CURRENT_DIR_STR, DISALLOWED_FILENAME_CHARS, PARENT_DIR_STR, SEPARATOR_STR, +}; use crate::windows::{Utf8WindowsComponents, WindowsComponent}; use crate::{private, ParseError, Utf8Component, Utf8Encoding, Utf8Path}; @@ -250,6 +252,31 @@ impl<'a> Utf8Component<'a> for Utf8WindowsComponent<'a> { matches!(self, Self::CurDir) } + /// Returns true if this component is valid. + /// + /// A component can only be invalid if it represents a normal component with characters that + /// are disallowed by the encoding. + /// + /// # Examples + /// + /// ``` + /// use typed_path::{Utf8Component, Utf8WindowsComponent}; + /// use std::convert::TryFrom; + /// + /// assert!(Utf8WindowsComponent::try_from("c:").unwrap().is_valid()); + /// assert!(Utf8WindowsComponent::RootDir.is_valid()); + /// assert!(Utf8WindowsComponent::ParentDir.is_valid()); + /// assert!(Utf8WindowsComponent::CurDir.is_valid()); + /// assert!(Utf8WindowsComponent::Normal("abc").is_valid()); + /// assert!(!Utf8WindowsComponent::Normal("|").is_valid()); + /// ``` + fn is_valid(&self) -> bool { + match self { + Self::Prefix(_) | Self::RootDir | Self::ParentDir | Self::CurDir => true, + Self::Normal(s) => !s.chars().any(|c| DISALLOWED_FILENAME_CHARS.contains(&c)), + } + } + fn len(&self) -> usize { self.as_str().len() }