diff --git a/library/alloc/src/ffi/mod.rs b/library/alloc/src/ffi/mod.rs
index 4f9dc40a3cfc9..0880e8a340f4e 100644
--- a/library/alloc/src/ffi/mod.rs
+++ b/library/alloc/src/ffi/mod.rs
@@ -89,3 +89,19 @@ pub use self::c_str::{FromVecWithNulError, IntoStringError, NulError};
#[unstable(feature = "c_str_module", issue = "112134")]
pub mod c_str;
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[doc(hidden)]
+pub mod os_str;
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[doc(hidden)]
+pub mod wtf8;
diff --git a/library/alloc/src/ffi/os_str.rs b/library/alloc/src/ffi/os_str.rs
new file mode 100644
index 0000000000000..521b0366df24a
--- /dev/null
+++ b/library/alloc/src/ffi/os_str.rs
@@ -0,0 +1,1272 @@
+//! The [`OsStr`] and [`OsString`] types and associated utilities.
+
+#[cfg(test)]
+mod tests;
+
+use core::ffi::os_str::{OsStr, Slice};
+use core::hash::{Hash, Hasher};
+use core::{cmp, fmt, ops};
+
+use crate::borrow::{Borrow, Cow, ToOwned};
+use crate::boxed::Box;
+use crate::collections::TryReserveError;
+use crate::rc::Rc;
+use crate::str::FromStr;
+use crate::string::String;
+use crate::sync::Arc;
+use crate::vec::Vec;
+
+mod private {
+ /// This trait being unreachable from outside the crate
+ /// prevents outside implementations of our extension traits.
+ /// This allows adding more trait methods in the future.
+ #[unstable(feature = "sealed", issue = "none")]
+ pub trait Sealed {}
+}
+
+#[cfg(any(target_os = "windows", target_os = "uefi"))]
+#[stable(feature = "rust1", since = "1.0.0")]
+pub mod os_str_ext_windows;
+
+#[cfg(not(any(target_os = "windows", target_os = "uefi")))]
+#[stable(feature = "rust1", since = "1.0.0")]
+pub mod os_str_ext_unix;
+
+#[cfg(any(target_os = "windows", target_os = "uefi"))]
+mod wtf8;
+#[cfg(any(target_os = "windows", target_os = "uefi"))]
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[doc(hidden)]
+pub use wtf8::Buf;
+
+#[cfg(not(any(target_os = "windows", target_os = "uefi")))]
+mod bytes;
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[cfg(not(any(target_os = "windows", target_os = "uefi")))]
+#[doc(hidden)]
+pub use bytes::Buf;
+
+/// A type that can represent owned, mutable platform-native strings, but is
+/// cheaply inter-convertible with Rust strings.
+///
+/// The need for this type arises from the fact that:
+///
+/// * On Unix systems, strings are often arbitrary sequences of non-zero
+/// bytes, in many cases interpreted as UTF-8.
+///
+/// * On Windows, strings are often arbitrary sequences of non-zero 16-bit
+/// values, interpreted as UTF-16 when it is valid to do so.
+///
+/// * In Rust, strings are always valid UTF-8, which may contain zeros.
+///
+/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust
+/// and platform-native string values, and in particular allowing a Rust string
+/// to be converted into an "OS" string with no cost if possible. A consequence
+/// of this is that `OsString` instances are *not* `NUL` terminated; in order
+/// to pass to e.g., Unix system call, you should create a [`CStr`].
+///
+/// `OsString` is to &[OsStr]
as [`String`] is to &[str]
: the former
+/// in each pair are owned strings; the latter are borrowed
+/// references.
+///
+/// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in
+/// the form native to the platform; While on Unix, strings are stored as a
+/// sequence of 8-bit values, on Windows, where strings are 16-bit value based
+/// as just discussed, strings are also actually stored as a sequence of 8-bit
+/// values, encoded in a less-strict variant of UTF-8. This is useful to
+/// understand when handling capacity and length values.
+///
+/// # Capacity of `OsString`
+///
+/// Capacity uses units of UTF-8 bytes for OS strings which were created from valid unicode, and
+/// uses units of bytes in an unspecified encoding for other contents. On a given target, all
+/// `OsString` and `OsStr` values use the same units for capacity, so the following will work:
+/// ```
+/// use std::ffi::{OsStr, OsString};
+///
+/// fn concat_os_strings(a: &OsStr, b: &OsStr) -> OsString {
+/// let mut ret = OsString::with_capacity(a.len() + b.len()); // This will allocate
+/// ret.push(a); // This will not allocate further
+/// ret.push(b); // This will not allocate further
+/// ret
+/// }
+/// ```
+///
+/// # Creating an `OsString`
+///
+/// **From a Rust string**: `OsString` implements
+/// [From]<[String]>
, so you can use my_string.[into]\()
to
+/// create an `OsString` from a normal Rust string.
+///
+/// **From slices:** Just like you can start with an empty Rust
+/// [`String`] and then [`String::push_str`] some &[str]
+/// sub-string slices into it, you can create an empty `OsString` with
+/// the [`OsString::new`] method and then push string slices into it with the
+/// [`OsString::push`] method.
+///
+/// # Extracting a borrowed reference to the whole OS string
+///
+/// You can use the [`OsString::as_os_str`] method to get an &[OsStr]
from
+/// an `OsString`; this is effectively a borrowed reference to the
+/// whole string.
+///
+/// # Conversions
+///
+/// See the [module's toplevel documentation about conversions][conversions] for a discussion on
+/// the traits which `OsString` implements for [conversions] from/to native representations.
+///
+/// [`CStr`]: crate::ffi::CStr
+/// [conversions]: super#conversions
+/// [into]: Into::into
+#[cfg_attr(not(test), rustc_diagnostic_item = "OsString")]
+#[stable(feature = "rust1", since = "1.0.0")]
+pub struct OsString {
+ inner: Buf,
+}
+
+/// Allows extension traits within `std`.
+#[unstable(feature = "sealed", issue = "none")]
+impl private::Sealed for OsString {}
+
+impl OsString {
+ /// Construct [`OsString`] from [`Buf`].
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[must_use]
+ #[inline]
+ #[doc(hidden)]
+ pub fn from_inner(inner: Buf) -> Self {
+ Self { inner }
+ }
+
+ /// Constructs a new empty `OsString`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let os_string = OsString::new();
+ /// ```
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[must_use]
+ #[inline]
+ pub fn new() -> OsString {
+ OsString { inner: Buf::from_string(String::new()) }
+ }
+
+ /// Converts bytes to an `OsString` without checking that the bytes contains
+ /// valid [`OsStr`]-encoded data.
+ ///
+ /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+ /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+ /// ASCII.
+ ///
+ /// See the [module's toplevel documentation about conversions][conversions] for safe,
+ /// cross-platform [conversions] from/to native representations.
+ ///
+ /// # Safety
+ ///
+ /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
+ /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same Rust version
+ /// built for the same target platform. For example, reconstructing an `OsString` from bytes sent
+ /// over the network or stored in a file will likely violate these safety rules.
+ ///
+ /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be
+ /// split either immediately before or immediately after any valid non-empty UTF-8 substring.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// let os_str = OsStr::new("Mary had a little lamb");
+ /// let bytes = os_str.as_encoded_bytes();
+ /// let words = bytes.split(|b| *b == b' ');
+ /// let words: Vec<&OsStr> = words.map(|word| {
+ /// // SAFETY:
+ /// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes`
+ /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
+ /// unsafe { OsStr::from_encoded_bytes_unchecked(word) }
+ /// }).collect();
+ /// ```
+ ///
+ /// [conversions]: super#conversions
+ #[inline]
+ #[stable(feature = "os_str_bytes", since = "1.74.0")]
+ pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec) -> Self {
+ OsString { inner: unsafe { Buf::from_encoded_bytes_unchecked(bytes) } }
+ }
+
+ /// Converts to an [`OsStr`] slice.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::{OsString, OsStr};
+ ///
+ /// let os_string = OsString::from("foo");
+ /// let os_str = OsStr::new("foo");
+ /// assert_eq!(os_string.as_os_str(), os_str);
+ /// ```
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[must_use]
+ #[inline]
+ pub fn as_os_str(&self) -> &OsStr {
+ self
+ }
+
+ /// Converts the `OsString` into a byte slice. To convert the byte slice back into an
+ /// `OsString`, use the [`OsStr::from_encoded_bytes_unchecked`] function.
+ ///
+ /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
+ /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
+ /// ASCII.
+ ///
+ /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
+ /// be treated as opaque and only comparable within the same Rust version built for the same
+ /// target platform. For example, sending the bytes over the network or storing it in a file
+ /// will likely result in incompatible data. See [`OsString`] for more encoding details
+ /// and [`std::ffi`] for platform-specific, specified conversions.
+ ///
+ /// [`std::ffi`]: crate::ffi
+ #[inline]
+ #[stable(feature = "os_str_bytes", since = "1.74.0")]
+ pub fn into_encoded_bytes(self) -> Vec {
+ self.inner.into_encoded_bytes()
+ }
+
+ /// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
+ ///
+ /// On failure, ownership of the original `OsString` is returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let os_string = OsString::from("foo");
+ /// let string = os_string.into_string();
+ /// assert_eq!(string, Ok(String::from("foo")));
+ /// ```
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[inline]
+ pub fn into_string(self) -> Result {
+ self.inner.into_string().map_err(|buf| OsString { inner: buf })
+ }
+
+ /// Extends the string with the given &[OsStr]
slice.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut os_string = OsString::from("foo");
+ /// os_string.push("bar");
+ /// assert_eq!(&os_string, "foobar");
+ /// ```
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[inline]
+ #[rustc_confusables("append", "put")]
+ pub fn push>(&mut self, s: T) {
+ self.inner.push_slice(s.as_ref().as_inner())
+ }
+
+ /// Creates a new `OsString` with at least the given capacity.
+ ///
+ /// The string will be able to hold at least `capacity` length units of other
+ /// OS strings without reallocating. This method is allowed to allocate for
+ /// more units than `capacity`. If `capacity` is 0, the string will not
+ /// allocate.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut os_string = OsString::with_capacity(10);
+ /// let capacity = os_string.capacity();
+ ///
+ /// // This push is done without reallocating
+ /// os_string.push("foo");
+ ///
+ /// assert_eq!(capacity, os_string.capacity());
+ /// ```
+ #[stable(feature = "osstring_simple_functions", since = "1.9.0")]
+ #[must_use]
+ #[inline]
+ pub fn with_capacity(capacity: usize) -> OsString {
+ OsString { inner: Buf::with_capacity(capacity) }
+ }
+
+ /// Truncates the `OsString` to zero length.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut os_string = OsString::from("foo");
+ /// assert_eq!(&os_string, "foo");
+ ///
+ /// os_string.clear();
+ /// assert_eq!(&os_string, "");
+ /// ```
+ #[stable(feature = "osstring_simple_functions", since = "1.9.0")]
+ #[inline]
+ pub fn clear(&mut self) {
+ self.inner.clear()
+ }
+
+ /// Returns the capacity this `OsString` can hold without reallocating.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let os_string = OsString::with_capacity(10);
+ /// assert!(os_string.capacity() >= 10);
+ /// ```
+ #[stable(feature = "osstring_simple_functions", since = "1.9.0")]
+ #[must_use]
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.inner.capacity()
+ }
+
+ /// Reserves capacity for at least `additional` more capacity to be inserted
+ /// in the given `OsString`. Does nothing if the capacity is
+ /// already sufficient.
+ ///
+ /// The collection may reserve more space to speculatively avoid frequent reallocations.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut s = OsString::new();
+ /// s.reserve(10);
+ /// assert!(s.capacity() >= 10);
+ /// ```
+ #[stable(feature = "osstring_simple_functions", since = "1.9.0")]
+ #[inline]
+ pub fn reserve(&mut self, additional: usize) {
+ self.inner.reserve(additional)
+ }
+
+ /// Tries to reserve capacity for at least `additional` more length units
+ /// in the given `OsString`. The string may reserve more space to speculatively avoid
+ /// frequent reallocations. After calling `try_reserve`, capacity will be
+ /// greater than or equal to `self.len() + additional` if it returns `Ok(())`.
+ /// Does nothing if capacity is already sufficient. This method preserves
+ /// the contents even if an error occurs.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Errors
+ ///
+ /// If the capacity overflows, or the allocator reports a failure, then an error
+ /// is returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::{OsStr, OsString};
+ /// use std::collections::TryReserveError;
+ ///
+ /// fn process_data(data: &str) -> Result {
+ /// let mut s = OsString::new();
+ ///
+ /// // Pre-reserve the memory, exiting if we can't
+ /// s.try_reserve(OsStr::new(data).len())?;
+ ///
+ /// // Now we know this can't OOM in the middle of our complex work
+ /// s.push(data);
+ ///
+ /// Ok(s)
+ /// }
+ /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?");
+ /// ```
+ #[stable(feature = "try_reserve_2", since = "1.63.0")]
+ #[inline]
+ pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve(additional)
+ }
+
+ /// Reserves the minimum capacity for at least `additional` more capacity to
+ /// be inserted in the given `OsString`. Does nothing if the capacity is
+ /// already sufficient.
+ ///
+ /// Note that the allocator may give the collection more space than it
+ /// requests. Therefore, capacity can not be relied upon to be precisely
+ /// minimal. Prefer [`reserve`] if future insertions are expected.
+ ///
+ /// [`reserve`]: OsString::reserve
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut s = OsString::new();
+ /// s.reserve_exact(10);
+ /// assert!(s.capacity() >= 10);
+ /// ```
+ #[stable(feature = "osstring_simple_functions", since = "1.9.0")]
+ #[inline]
+ pub fn reserve_exact(&mut self, additional: usize) {
+ self.inner.reserve_exact(additional)
+ }
+
+ /// Tries to reserve the minimum capacity for at least `additional`
+ /// more length units in the given `OsString`. After calling
+ /// `try_reserve_exact`, capacity will be greater than or equal to
+ /// `self.len() + additional` if it returns `Ok(())`.
+ /// Does nothing if the capacity is already sufficient.
+ ///
+ /// Note that the allocator may give the `OsString` more space than it
+ /// requests. Therefore, capacity can not be relied upon to be precisely
+ /// minimal. Prefer [`try_reserve`] if future insertions are expected.
+ ///
+ /// [`try_reserve`]: OsString::try_reserve
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Errors
+ ///
+ /// If the capacity overflows, or the allocator reports a failure, then an error
+ /// is returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::{OsStr, OsString};
+ /// use std::collections::TryReserveError;
+ ///
+ /// fn process_data(data: &str) -> Result {
+ /// let mut s = OsString::new();
+ ///
+ /// // Pre-reserve the memory, exiting if we can't
+ /// s.try_reserve_exact(OsStr::new(data).len())?;
+ ///
+ /// // Now we know this can't OOM in the middle of our complex work
+ /// s.push(data);
+ ///
+ /// Ok(s)
+ /// }
+ /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?");
+ /// ```
+ #[stable(feature = "try_reserve_2", since = "1.63.0")]
+ #[inline]
+ pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve_exact(additional)
+ }
+
+ /// Shrinks the capacity of the `OsString` to match its length.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut s = OsString::from("foo");
+ ///
+ /// s.reserve(100);
+ /// assert!(s.capacity() >= 100);
+ ///
+ /// s.shrink_to_fit();
+ /// assert_eq!(3, s.capacity());
+ /// ```
+ #[stable(feature = "osstring_shrink_to_fit", since = "1.19.0")]
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.inner.shrink_to_fit()
+ }
+
+ /// Shrinks the capacity of the `OsString` with a lower bound.
+ ///
+ /// The capacity will remain at least as large as both the length
+ /// and the supplied value.
+ ///
+ /// If the current capacity is less than the lower limit, this is a no-op.
+ ///
+ /// See the main `OsString` documentation information about encoding and capacity units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// let mut s = OsString::from("foo");
+ ///
+ /// s.reserve(100);
+ /// assert!(s.capacity() >= 100);
+ ///
+ /// s.shrink_to(10);
+ /// assert!(s.capacity() >= 10);
+ /// s.shrink_to(0);
+ /// assert!(s.capacity() >= 3);
+ /// ```
+ #[inline]
+ #[stable(feature = "shrink_to", since = "1.56.0")]
+ pub fn shrink_to(&mut self, min_capacity: usize) {
+ self.inner.shrink_to(min_capacity)
+ }
+
+ /// Converts this `OsString` into a boxed [`OsStr`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::{OsString, OsStr};
+ ///
+ /// let s = OsString::from("hello");
+ ///
+ /// let b: Box = s.into_boxed_os_str();
+ /// ```
+ #[must_use = "`self` will be dropped if the result is not used"]
+ #[stable(feature = "into_boxed_os_str", since = "1.20.0")]
+ pub fn into_boxed_os_str(self) -> Box {
+ let rw = Box::into_raw(self.inner.into_box()) as *mut OsStr;
+ unsafe { Box::from_raw(rw) }
+ }
+
+ /// Consumes and leaks the `OsString`, returning a mutable reference to the contents,
+ /// `&'a mut OsStr`.
+ ///
+ /// The caller has free choice over the returned lifetime, including 'static.
+ /// Indeed, this function is ideally used for data that lives for the remainder of
+ /// the program’s life, as dropping the returned reference will cause a memory leak.
+ ///
+ /// It does not reallocate or shrink the `OsString`, so the leaked allocation may include
+ /// unused capacity that is not part of the returned slice. If you want to discard excess
+ /// capacity, call [`into_boxed_os_str`], and then [`Box::leak`] instead.
+ /// However, keep in mind that trimming the capacity may result in a reallocation and copy.
+ ///
+ /// [`into_boxed_os_str`]: Self::into_boxed_os_str
+ #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")]
+ #[inline]
+ pub fn leak<'a>(self) -> &'a mut OsStr {
+ OsStr::from_inner_mut(self.inner.leak())
+ }
+
+ /// Provides plumbing to core `Vec::truncate`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[inline]
+ #[allow(unused)]
+ pub(crate) fn truncate(&mut self, len: usize) {
+ self.inner.truncate(len);
+ }
+
+ /// Provides plumbing to core `Vec::extend_from_slice`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[inline]
+ #[allow(unused)]
+ pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+ self.inner.extend_from_slice(other);
+ }
+}
+
+impl OsStr {
+ /// Returns a copy of this string where each character is mapped to its
+ /// ASCII upper case equivalent.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To uppercase the value in-place, use [`OsStr::make_ascii_uppercase`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ /// let s = OsString::from("Grüße, Jürgen ❤");
+ ///
+ /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase());
+ /// ```
+ #[rustc_allow_incoherent_impl]
+ #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase`"]
+ #[stable(feature = "osstring_ascii", since = "1.53.0")]
+ pub fn to_ascii_uppercase(&self) -> OsString {
+ OsString::from_inner(self.as_inner().to_ascii_uppercase())
+ }
+
+ /// Returns a copy of this string where each character is mapped to its
+ /// ASCII lower case equivalent.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To lowercase the value in-place, use [`OsStr::make_ascii_lowercase`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ /// let s = OsString::from("Grüße, Jürgen ❤");
+ ///
+ /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase());
+ /// ```
+ #[rustc_allow_incoherent_impl]
+ #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase`"]
+ #[stable(feature = "osstring_ascii", since = "1.53.0")]
+ pub fn to_ascii_lowercase(&self) -> OsString {
+ OsString::from_inner(self.as_inner().to_ascii_lowercase())
+ }
+
+ /// Converts a [Box]<[OsStr]>
into an [`OsString`] without copying or allocating.
+ #[rustc_allow_incoherent_impl]
+ #[stable(feature = "into_boxed_os_str", since = "1.20.0")]
+ #[must_use = "`self` will be dropped if the result is not used"]
+ pub fn into_os_string(self: Box) -> OsString {
+ let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) };
+ OsString { inner: Buf::from_box(boxed) }
+ }
+
+ /// Converts an `OsStr` to a [Cow]<[str]>
.
+ ///
+ /// Any non-Unicode sequences are replaced with
+ /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD].
+ ///
+ /// [U+FFFD]: crate::char::REPLACEMENT_CHARACTER
+ ///
+ /// # Examples
+ ///
+ /// Calling `to_string_lossy` on an `OsStr` with invalid unicode:
+ ///
+ /// ```
+ /// // Note, due to differences in how Unix and Windows represent strings,
+ /// // we are forced to complicate this example, setting up example `OsStr`s
+ /// // with different source data and via different platform extensions.
+ /// // Understand that in reality you could end up with such example invalid
+ /// // sequences simply through collecting user command line arguments, for
+ /// // example.
+ ///
+ /// #[cfg(unix)] {
+ /// use std::ffi::OsStr;
+ /// use std::os::unix::ffi::OsStrExt;
+ ///
+ /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o'
+ /// // respectively. The value 0x80 is a lone continuation byte, invalid
+ /// // in a UTF-8 sequence.
+ /// let source = [0x66, 0x6f, 0x80, 0x6f];
+ /// let os_str = OsStr::from_bytes(&source[..]);
+ ///
+ /// assert_eq!(os_str.to_string_lossy(), "fo�o");
+ /// }
+ /// #[cfg(windows)] {
+ /// use std::ffi::OsString;
+ /// use std::os::windows::prelude::*;
+ ///
+ /// // Here the values 0x0066 and 0x006f correspond to 'f' and 'o'
+ /// // respectively. The value 0xD800 is a lone surrogate half, invalid
+ /// // in a UTF-16 sequence.
+ /// let source = [0x0066, 0x006f, 0xD800, 0x006f];
+ /// let os_string = OsString::from_wide(&source[..]);
+ /// let os_str = os_string.as_os_str();
+ ///
+ /// assert_eq!(os_str.to_string_lossy(), "fo�o");
+ /// }
+ /// ```
+ #[rustc_allow_incoherent_impl]
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[must_use = "this returns the result of the operation, \
+ without modifying the original"]
+ #[inline]
+ pub fn to_string_lossy(&self) -> Cow<'_, str> {
+ self.as_inner().to_string_lossy()
+ }
+
+ /// Copies the slice into an owned [`OsString`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::{OsStr, OsString};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let os_string = os_str.to_os_string();
+ /// assert_eq!(os_string, OsString::from("foo"));
+ /// ```
+ #[rustc_allow_incoherent_impl]
+ #[stable(feature = "rust1", since = "1.0.0")]
+ #[must_use = "this returns the result of the operation, \
+ without modifying the original"]
+ #[inline]
+ pub fn to_os_string(&self) -> OsString {
+ OsString { inner: self.as_inner().to_owned() }
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl From for OsString {
+ /// Converts a [`String`] into an [`OsString`].
+ ///
+ /// This conversion does not allocate or copy memory.
+ #[inline]
+ fn from(s: String) -> OsString {
+ OsString { inner: Buf::from_string(s) }
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl> From<&T> for OsString {
+ /// Copies any value implementing [AsRef]<[OsStr]>
+ /// into a newly allocated [`OsString`].
+ fn from(s: &T) -> OsString {
+ s.as_ref().to_os_string()
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl ops::Index for OsString {
+ type Output = OsStr;
+
+ #[inline]
+ fn index(&self, _index: ops::RangeFull) -> &OsStr {
+ OsStr::from_inner(self.inner.as_slice())
+ }
+}
+
+#[stable(feature = "mut_osstr", since = "1.44.0")]
+impl ops::IndexMut for OsString {
+ #[inline]
+ fn index_mut(&mut self, _index: ops::RangeFull) -> &mut OsStr {
+ OsStr::from_inner_mut(self.inner.as_mut_slice())
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl ops::Deref for OsString {
+ type Target = OsStr;
+
+ #[inline]
+ fn deref(&self) -> &OsStr {
+ &self[..]
+ }
+}
+
+#[stable(feature = "mut_osstr", since = "1.44.0")]
+impl ops::DerefMut for OsString {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut OsStr {
+ &mut self[..]
+ }
+}
+
+#[stable(feature = "osstring_default", since = "1.9.0")]
+impl Default for OsString {
+ /// Constructs an empty `OsString`.
+ #[inline]
+ fn default() -> OsString {
+ OsString::new()
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Clone for OsString {
+ #[inline]
+ fn clone(&self) -> Self {
+ OsString { inner: self.inner.clone() }
+ }
+
+ /// Clones the contents of `source` into `self`.
+ ///
+ /// This method is preferred over simply assigning `source.clone()` to `self`,
+ /// as it avoids reallocation if possible.
+ #[inline]
+ fn clone_from(&mut self, source: &Self) {
+ self.inner.clone_from(&source.inner)
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl fmt::Debug for OsString {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&**self, formatter)
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl PartialEq for OsString {
+ #[inline]
+ fn eq(&self, other: &OsString) -> bool {
+ &**self == &**other
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl PartialEq for OsString {
+ #[inline]
+ fn eq(&self, other: &str) -> bool {
+ &**self == other
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl PartialEq for str {
+ #[inline]
+ fn eq(&self, other: &OsString) -> bool {
+ &**other == self
+ }
+}
+
+#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")]
+impl PartialEq<&str> for OsString {
+ #[inline]
+ fn eq(&self, other: &&str) -> bool {
+ **self == **other
+ }
+}
+
+#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")]
+impl<'a> PartialEq for &'a str {
+ #[inline]
+ fn eq(&self, other: &OsString) -> bool {
+ **other == **self
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Eq for OsString {}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl PartialOrd for OsString {
+ #[inline]
+ fn partial_cmp(&self, other: &OsString) -> Option {
+ (&**self).partial_cmp(&**other)
+ }
+ #[inline]
+ fn lt(&self, other: &OsString) -> bool {
+ &**self < &**other
+ }
+ #[inline]
+ fn le(&self, other: &OsString) -> bool {
+ &**self <= &**other
+ }
+ #[inline]
+ fn gt(&self, other: &OsString) -> bool {
+ &**self > &**other
+ }
+ #[inline]
+ fn ge(&self, other: &OsString) -> bool {
+ &**self >= &**other
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl PartialOrd for OsString {
+ #[inline]
+ fn partial_cmp(&self, other: &str) -> Option {
+ (&**self).partial_cmp(other)
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Ord for OsString {
+ #[inline]
+ fn cmp(&self, other: &OsString) -> cmp::Ordering {
+ (&**self).cmp(&**other)
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Hash for OsString {
+ #[inline]
+ fn hash(&self, state: &mut H) {
+ (&**self).hash(state)
+ }
+}
+
+#[stable(feature = "os_string_fmt_write", since = "1.64.0")]
+impl fmt::Write for OsString {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ self.push(s);
+ Ok(())
+ }
+}
+
+#[stable(feature = "box_from_os_str", since = "1.17.0")]
+impl From<&OsStr> for Box {
+ /// Copies the string into a newly allocated [Box]<[OsStr]>
.
+ #[inline]
+ fn from(s: &OsStr) -> Box {
+ let rw = Box::into_raw(s.as_inner().into_box()) as *mut OsStr;
+ unsafe { Box::from_raw(rw) }
+ }
+}
+
+#[stable(feature = "box_from_cow", since = "1.45.0")]
+impl From> for Box {
+ /// Converts a `Cow<'a, OsStr>` into a [Box]<[OsStr]>
,
+ /// by copying the contents if they are borrowed.
+ #[inline]
+ fn from(cow: Cow<'_, OsStr>) -> Box {
+ match cow {
+ Cow::Borrowed(s) => Box::from(s),
+ Cow::Owned(s) => Box::from(s),
+ }
+ }
+}
+
+#[stable(feature = "os_string_from_box", since = "1.18.0")]
+impl From> for OsString {
+ /// Converts a [Box]<[OsStr]>
into an [`OsString`] without copying or
+ /// allocating.
+ #[inline]
+ fn from(boxed: Box) -> OsString {
+ boxed.into_os_string()
+ }
+}
+
+#[stable(feature = "box_from_os_string", since = "1.20.0")]
+impl From for Box {
+ /// Converts an [`OsString`] into a [Box]<[OsStr]>
without copying or allocating.
+ #[inline]
+ fn from(s: OsString) -> Box {
+ s.into_boxed_os_str()
+ }
+}
+
+#[stable(feature = "more_box_slice_clone", since = "1.29.0")]
+impl Clone for Box {
+ #[inline]
+ fn clone(&self) -> Self {
+ self.to_os_string().into_boxed_os_str()
+ }
+}
+
+#[stable(feature = "shared_from_slice2", since = "1.24.0")]
+impl From for Arc {
+ /// Converts an [`OsString`] into an [Arc]<[OsStr]>
by moving the [`OsString`]
+ /// data into a new [`Arc`] buffer.
+ #[inline]
+ fn from(s: OsString) -> Arc {
+ let arc = s.inner.into_arc();
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) }
+ }
+}
+
+#[stable(feature = "shared_from_slice2", since = "1.24.0")]
+impl From<&OsStr> for Arc {
+ /// Copies the string into a newly allocated [Arc]<[OsStr]>
.
+ #[inline]
+ fn from(s: &OsStr) -> Arc {
+ let arc = s.as_inner().into_arc();
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) }
+ }
+}
+
+#[stable(feature = "shared_from_slice2", since = "1.24.0")]
+impl From for Rc {
+ /// Converts an [`OsString`] into an [Rc]<[OsStr]>
by moving the [`OsString`]
+ /// data into a new [`Rc`] buffer.
+ #[inline]
+ fn from(s: OsString) -> Rc {
+ let rc = s.inner.into_rc();
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) }
+ }
+}
+
+#[stable(feature = "shared_from_slice2", since = "1.24.0")]
+impl From<&OsStr> for Rc {
+ /// Copies the string into a newly allocated [Rc]<[OsStr]>
.
+ #[inline]
+ fn from(s: &OsStr) -> Rc {
+ let rc = s.as_inner().into_rc();
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) }
+ }
+}
+
+#[stable(feature = "cow_from_osstr", since = "1.28.0")]
+impl<'a> From for Cow<'a, OsStr> {
+ /// Moves the string into a [`Cow::Owned`].
+ #[inline]
+ fn from(s: OsString) -> Cow<'a, OsStr> {
+ Cow::Owned(s)
+ }
+}
+
+#[stable(feature = "cow_from_osstr", since = "1.28.0")]
+impl<'a> From<&'a OsStr> for Cow<'a, OsStr> {
+ /// Converts the string reference into a [`Cow::Borrowed`].
+ #[inline]
+ fn from(s: &'a OsStr) -> Cow<'a, OsStr> {
+ Cow::Borrowed(s)
+ }
+}
+
+#[stable(feature = "cow_from_osstr", since = "1.28.0")]
+impl<'a> From<&'a OsString> for Cow<'a, OsStr> {
+ /// Converts the string reference into a [`Cow::Borrowed`].
+ #[inline]
+ fn from(s: &'a OsString) -> Cow<'a, OsStr> {
+ Cow::Borrowed(s.as_os_str())
+ }
+}
+
+#[stable(feature = "osstring_from_cow_osstr", since = "1.28.0")]
+impl<'a> From> for OsString {
+ /// Converts a `Cow<'a, OsStr>` into an [`OsString`],
+ /// by copying the contents if they are borrowed.
+ #[inline]
+ fn from(s: Cow<'a, OsStr>) -> Self {
+ s.into_owned()
+ }
+}
+
+#[stable(feature = "box_default_extra", since = "1.17.0")]
+impl Default for Box {
+ #[inline]
+ fn default() -> Box {
+ let rw = Box::into_raw(Slice::empty_box()) as *mut OsStr;
+ unsafe { Box::from_raw(rw) }
+ }
+}
+
+macro_rules! impl_cmp {
+ ($lhs:ty, $rhs: ty) => {
+ #[allow(unused_lifetimes)]
+ #[stable(feature = "cmp_os_str", since = "1.8.0")]
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ ::eq(self, other)
+ }
+ }
+
+ #[allow(unused_lifetimes)]
+ #[stable(feature = "cmp_os_str", since = "1.8.0")]
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ ::eq(self, other)
+ }
+ }
+
+ #[allow(unused_lifetimes)]
+ #[stable(feature = "cmp_os_str", since = "1.8.0")]
+ impl<'a, 'b> PartialOrd<$rhs> for $lhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$rhs) -> Option {
+ ::partial_cmp(self, other)
+ }
+ }
+
+ #[allow(unused_lifetimes)]
+ #[stable(feature = "cmp_os_str", since = "1.8.0")]
+ impl<'a, 'b> PartialOrd<$lhs> for $rhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$lhs) -> Option {
+ ::partial_cmp(self, other)
+ }
+ }
+ };
+}
+
+impl_cmp!(OsString, OsStr);
+impl_cmp!(OsString, &'a OsStr);
+impl_cmp!(Cow<'a, OsStr>, OsStr);
+impl_cmp!(Cow<'a, OsStr>, &'b OsStr);
+impl_cmp!(Cow<'a, OsStr>, OsString);
+
+#[unstable(feature = "slice_concat_ext", issue = "27747")]
+impl> crate::slice::Join<&OsStr> for [S] {
+ type Output = OsString;
+
+ fn join(slice: &Self, sep: &OsStr) -> OsString {
+ let Some((first, suffix)) = slice.split_first() else {
+ return OsString::new();
+ };
+ let first_owned = first.borrow().to_owned();
+ suffix.iter().fold(first_owned, |mut a, b| {
+ a.push(sep);
+ a.push(b.borrow());
+ a
+ })
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Borrow for OsString {
+ #[inline]
+ fn borrow(&self) -> &OsStr {
+ &self[..]
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl ToOwned for OsStr {
+ type Owned = OsString;
+ #[inline]
+ fn to_owned(&self) -> OsString {
+ self.to_os_string()
+ }
+ #[inline]
+ fn clone_into(&self, target: &mut OsString) {
+ self.as_inner().clone_into(&mut target.inner)
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl AsRef for OsString {
+ #[inline]
+ fn as_ref(&self) -> &OsStr {
+ self
+ }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl AsRef for String {
+ #[inline]
+ fn as_ref(&self) -> &OsStr {
+ (&**self).as_ref()
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[doc(hidden)]
+impl From for OsString {
+ #[inline]
+ fn from(buf: Buf) -> OsString {
+ OsString { inner: buf }
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[doc(hidden)]
+impl Into for OsString {
+ #[inline]
+ fn into(self) -> Buf {
+ self.inner
+ }
+}
+
+#[stable(feature = "osstring_from_str", since = "1.45.0")]
+impl FromStr for OsString {
+ type Err = core::convert::Infallible;
+
+ #[inline]
+ fn from_str(s: &str) -> Result {
+ Ok(OsString::from(s))
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl Extend for OsString {
+ #[inline]
+ fn extend>(&mut self, iter: T) {
+ for s in iter {
+ self.push(&s);
+ }
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl<'a> Extend<&'a OsStr> for OsString {
+ #[inline]
+ fn extend>(&mut self, iter: T) {
+ for s in iter {
+ self.push(s);
+ }
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl<'a> Extend> for OsString {
+ #[inline]
+ fn extend>>(&mut self, iter: T) {
+ for s in iter {
+ self.push(&s);
+ }
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl FromIterator for OsString {
+ #[inline]
+ fn from_iter>(iter: I) -> Self {
+ let mut iterator = iter.into_iter();
+
+ // Because we're iterating over `OsString`s, we can avoid at least
+ // one allocation by getting the first string from the iterator
+ // and appending to it all the subsequent strings.
+ match iterator.next() {
+ None => OsString::new(),
+ Some(mut buf) => {
+ buf.extend(iterator);
+ buf
+ }
+ }
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl<'a> FromIterator<&'a OsStr> for OsString {
+ #[inline]
+ fn from_iter>(iter: I) -> Self {
+ let mut buf = Self::new();
+ for s in iter {
+ buf.push(s);
+ }
+ buf
+ }
+}
+
+#[stable(feature = "osstring_extend", since = "1.52.0")]
+impl<'a> FromIterator> for OsString {
+ #[inline]
+ fn from_iter>>(iter: I) -> Self {
+ let mut iterator = iter.into_iter();
+
+ // Because we're iterating over `OsString`s, we can avoid at least
+ // one allocation by getting the first owned string from the iterator
+ // and appending to it all the subsequent strings.
+ match iterator.next() {
+ None => OsString::new(),
+ Some(Cow::Owned(mut buf)) => {
+ buf.extend(iterator);
+ buf
+ }
+ Some(Cow::Borrowed(buf)) => {
+ let mut buf = OsString::from(buf);
+ buf.extend(iterator);
+ buf
+ }
+ }
+ }
+}
diff --git a/library/alloc/src/ffi/os_str/bytes.rs b/library/alloc/src/ffi/os_str/bytes.rs
new file mode 100644
index 0000000000000..55fe072c7be6e
--- /dev/null
+++ b/library/alloc/src/ffi/os_str/bytes.rs
@@ -0,0 +1,435 @@
+#![allow(missing_docs)]
+#![allow(missing_debug_implementations)]
+
+//! The underlying OsString/OsStr implementation on Unix and many other
+//! systems: just a `Vec`/`[u8]`.
+
+use core::ffi::os_str::Slice;
+use core::{fmt, mem, str};
+
+use crate::borrow::{Cow, ToOwned};
+use crate::boxed::Box;
+use crate::collections::TryReserveError;
+use crate::rc::Rc;
+use crate::string::String;
+use crate::sync::Arc;
+use crate::vec::Vec;
+
+#[cfg(test)]
+mod tests;
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[derive(Hash)]
+#[repr(transparent)]
+pub struct Buf {
+ pub inner: Vec,
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl fmt::Debug for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.as_slice(), formatter)
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl fmt::Display for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.as_slice(), formatter)
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl Clone for Buf {
+ #[inline]
+ fn clone(&self) -> Self {
+ Buf { inner: self.inner.clone() }
+ }
+
+ #[inline]
+ fn clone_from(&mut self, source: &Self) {
+ self.inner.clone_from(&source.inner)
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl Into> for Buf {
+ fn into(self) -> Vec {
+ self.inner
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl AsRef<[u8]> for Buf {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ &self.inner
+ }
+}
+
+impl Buf {
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_encoded_bytes(self) -> Vec {
+ self.inner
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self {
+ Self { inner: s }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn from_string(s: String) -> Buf {
+ Buf { inner: s.into_bytes() }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn with_capacity(capacity: usize) -> Buf {
+ Buf { inner: Vec::with_capacity(capacity) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn clear(&mut self) {
+ self.inner.clear()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.inner.capacity()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn reserve(&mut self, additional: usize) {
+ self.inner.reserve(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn reserve_exact(&mut self, additional: usize) {
+ self.inner.reserve_exact(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve_exact(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.inner.shrink_to_fit()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn shrink_to(&mut self, min_capacity: usize) {
+ self.inner.shrink_to(min_capacity)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn as_slice(&self) -> &Slice {
+ // SAFETY: Slice just wraps [u8],
+ // and &*self.inner is &[u8], therefore
+ // transmuting &[u8] to &Slice is safe.
+ unsafe { mem::transmute(&*self.inner) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn as_mut_slice(&mut self) -> &mut Slice {
+ // SAFETY: Slice just wraps [u8],
+ // and &mut *self.inner is &mut [u8], therefore
+ // transmuting &mut [u8] to &mut Slice is safe.
+ unsafe { mem::transmute(&mut *self.inner) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn into_string(self) -> Result {
+ String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() })
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn push_slice(&mut self, s: &Slice) {
+ self.inner.extend_from_slice(&s.inner)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn leak<'a>(self) -> &'a mut Slice {
+ unsafe { mem::transmute(self.inner.leak()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(self) -> Box {
+ unsafe { mem::transmute(self.inner.into_boxed_slice()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn from_box(boxed: Box) -> Buf {
+ let inner: Box<[u8]> = unsafe { mem::transmute(boxed) };
+ Buf { inner: inner.into_vec() }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_arc(&self) -> Arc {
+ self.as_slice().into_arc()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_rc(&self) -> Rc {
+ self.as_slice().into_rc()
+ }
+
+ /// Provides plumbing to core `Vec::truncate`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub(crate) fn truncate(&mut self, len: usize) {
+ self.inner.truncate(len);
+ }
+
+ /// Provides plumbing to core `Vec::extend_from_slice`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+ self.inner.extend_from_slice(other);
+ }
+}
+
+impl Slice {
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_string_lossy(&self) -> Cow<'_, str> {
+ String::from_utf8_lossy(&self.inner)
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_owned(&self) -> Buf {
+ Buf { inner: self.inner.to_vec() }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn clone_into(&self, buf: &mut Buf) {
+ self.inner.clone_into(&mut buf.inner)
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(&self) -> Box {
+ let boxed: Box<[u8]> = self.inner.into();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn empty_box() -> Box {
+ let boxed: Box<[u8]> = Default::default();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_arc(&self) -> Arc {
+ let arc: Arc<[u8]> = Arc::from(&self.inner);
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_rc(&self) -> Rc {
+ let rc: Rc<[u8]> = Rc::from(&self.inner);
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_lowercase(&self) -> Buf {
+ Buf { inner: self.inner.to_ascii_lowercase() }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_uppercase(&self) -> Buf {
+ Buf { inner: self.inner.to_ascii_uppercase() }
+ }
+}
diff --git a/library/alloc/src/ffi/os_str/os_str_ext_unix.rs b/library/alloc/src/ffi/os_str/os_str_ext_unix.rs
new file mode 100644
index 0000000000000..fd8891624b8f3
--- /dev/null
+++ b/library/alloc/src/ffi/os_str/os_str_ext_unix.rs
@@ -0,0 +1,35 @@
+//! [`OsStringExt`] for unix.
+
+use super::{private, Buf, OsString};
+use crate::vec::Vec;
+
+/// Platform-specific extensions to [`OsString`].
+///
+/// This trait is sealed: it cannot be implemented outside the standard library.
+/// This is so that future additional methods are not breaking changes.
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait OsStringExt: private::Sealed {
+ /// Creates an [`OsString`] from a byte vector.
+ ///
+ /// See the module documentation for an example.
+ #[stable(feature = "rust1", since = "1.0.0")]
+ fn from_vec(vec: Vec) -> Self;
+
+ /// Yields the underlying byte vector of this [`OsString`].
+ ///
+ /// See the module documentation for an example.
+ #[stable(feature = "rust1", since = "1.0.0")]
+ fn into_vec(self) -> Vec;
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl OsStringExt for OsString {
+ #[inline]
+ fn from_vec(vec: Vec) -> OsString {
+ From::from(Buf { inner: vec })
+ }
+ #[inline]
+ fn into_vec(self) -> Vec {
+ self.inner.inner
+ }
+}
diff --git a/library/alloc/src/ffi/os_str/os_str_ext_windows.rs b/library/alloc/src/ffi/os_str/os_str_ext_windows.rs
new file mode 100644
index 0000000000000..fa2e0a8d25f26
--- /dev/null
+++ b/library/alloc/src/ffi/os_str/os_str_ext_windows.rs
@@ -0,0 +1,38 @@
+//! [`OsStringExt`] for windows.
+
+use super::{private, Buf, OsString};
+use crate::ffi::wtf8::Wtf8Buf;
+
+/// Windows-specific extensions to [`OsString`].
+///
+/// This trait is sealed: it cannot be implemented outside the standard library.
+/// This is so that future additional methods are not breaking changes.
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait OsStringExt: private::Sealed {
+ /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of
+ /// 16-bit code units.
+ ///
+ /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string
+ /// will always return the original code units.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ /// use std::os::windows::prelude::*;
+ ///
+ /// // UTF-16 encoding for "Unicode".
+ /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065];
+ ///
+ /// let string = OsString::from_wide(&source[..]);
+ /// ```
+ #[stable(feature = "rust1", since = "1.0.0")]
+ fn from_wide(wide: &[u16]) -> Self;
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl OsStringExt for OsString {
+ fn from_wide(wide: &[u16]) -> OsString {
+ From::from(Buf { inner: Wtf8Buf::from_wide(wide) })
+ }
+}
diff --git a/library/std/src/ffi/os_str/tests.rs b/library/alloc/src/ffi/os_str/tests.rs
similarity index 80%
rename from library/std/src/ffi/os_str/tests.rs
rename to library/alloc/src/ffi/os_str/tests.rs
index 67147934b4db3..5faac75621f29 100644
--- a/library/std/src/ffi/os_str/tests.rs
+++ b/library/alloc/src/ffi/os_str/tests.rs
@@ -185,49 +185,6 @@ fn into_rc() {
assert_eq!(&*arc2, os_str);
}
-#[test]
-fn slice_encoded_bytes() {
- let os_str = OsStr::new("123θგ🦀");
- // ASCII
- let digits = os_str.slice_encoded_bytes(..3);
- assert_eq!(digits, "123");
- let three = os_str.slice_encoded_bytes(2..3);
- assert_eq!(three, "3");
- // 2-byte UTF-8
- let theta = os_str.slice_encoded_bytes(3..5);
- assert_eq!(theta, "θ");
- // 3-byte UTF-8
- let gani = os_str.slice_encoded_bytes(5..8);
- assert_eq!(gani, "გ");
- // 4-byte UTF-8
- let crab = os_str.slice_encoded_bytes(8..);
- assert_eq!(crab, "🦀");
-}
-
-#[test]
-#[should_panic]
-fn slice_out_of_bounds() {
- let crab = OsStr::new("🦀");
- let _ = crab.slice_encoded_bytes(..5);
-}
-
-#[test]
-#[should_panic]
-fn slice_mid_char() {
- let crab = OsStr::new("🦀");
- let _ = crab.slice_encoded_bytes(..2);
-}
-
-#[cfg(unix)]
-#[test]
-#[should_panic(expected = "byte index 1 is not an OsStr boundary")]
-fn slice_invalid_data() {
- use crate::os::unix::ffi::OsStrExt;
-
- let os_string = OsStr::from_bytes(b"\xFF\xFF");
- let _ = os_string.slice_encoded_bytes(1..);
-}
-
#[cfg(unix)]
#[test]
#[should_panic(expected = "byte index 1 is not an OsStr boundary")]
@@ -288,18 +245,3 @@ fn slice_surrogate_edge() {
assert_eq!(post_crab.slice_encoded_bytes(..4), "🦀");
assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate);
}
-
-#[test]
-fn clone_to_uninit() {
- let a = OsStr::new("hello.txt");
-
- let mut storage = vec![MaybeUninit::::uninit(); size_of_val::(a)];
- unsafe { a.clone_to_uninit(ptr::from_mut::<[_]>(storage.as_mut_slice()) as *mut OsStr) };
- assert_eq!(a.as_encoded_bytes(), unsafe { MaybeUninit::slice_assume_init_ref(&storage) });
-
- let mut b: Box = OsStr::new("world.exe").into();
- assert_eq!(size_of_val::(a), size_of_val::(&b));
- assert_ne!(a, &*b);
- unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b)) };
- assert_eq!(a, &*b);
-}
diff --git a/library/alloc/src/ffi/os_str/wtf8.rs b/library/alloc/src/ffi/os_str/wtf8.rs
new file mode 100644
index 0000000000000..004ba6ca9094f
--- /dev/null
+++ b/library/alloc/src/ffi/os_str/wtf8.rs
@@ -0,0 +1,416 @@
+#![allow(missing_docs)]
+#![allow(missing_debug_implementations)]
+
+//! The underlying OsString/OsStr implementation on Windows is a
+//! wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
+use core::ffi::os_str::Slice;
+use core::ffi::wtf8::Wtf8;
+use core::{fmt, mem};
+
+use crate::borrow::Cow;
+use crate::boxed::Box;
+use crate::collections::TryReserveError;
+use crate::ffi::wtf8::Wtf8Buf;
+use crate::rc::Rc;
+use crate::string::String;
+use crate::sync::Arc;
+use crate::vec::Vec;
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[derive(Clone, Hash)]
+pub struct Buf {
+ pub inner: Wtf8Buf,
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl Into for Buf {
+ fn into(self) -> Wtf8Buf {
+ self.inner
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl From for Buf {
+ fn from(inner: Wtf8Buf) -> Self {
+ Buf { inner }
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl AsRef for Buf {
+ #[inline]
+ fn as_ref(&self) -> &Wtf8 {
+ &self.inner
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl fmt::Debug for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.as_slice(), formatter)
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl fmt::Display for Buf {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.as_slice(), formatter)
+ }
+}
+
+impl Buf {
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_encoded_bytes(self) -> Vec {
+ self.inner.into_bytes()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self {
+ unsafe { Self { inner: Wtf8Buf::from_bytes_unchecked(s) } }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn with_capacity(capacity: usize) -> Buf {
+ Buf { inner: Wtf8Buf::with_capacity(capacity) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn clear(&mut self) {
+ self.inner.clear()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn capacity(&self) -> usize {
+ self.inner.capacity()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn from_string(s: String) -> Buf {
+ Buf { inner: Wtf8Buf::from_string(s) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn as_slice(&self) -> &Slice {
+ // SAFETY: Slice is just a wrapper for Wtf8,
+ // and self.inner.as_slice() returns &Wtf8.
+ // Therefore, transmuting &Wtf8 to &Slice is safe.
+ unsafe { mem::transmute(self.inner.as_slice()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn as_mut_slice(&mut self) -> &mut Slice {
+ // SAFETY: Slice is just a wrapper for Wtf8,
+ // and self.inner.as_mut_slice() returns &mut Wtf8.
+ // Therefore, transmuting &mut Wtf8 to &mut Slice is safe.
+ // Additionally, care should be taken to ensure the slice
+ // is always valid Wtf8.
+ unsafe { mem::transmute(self.inner.as_mut_slice()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn into_string(self) -> Result {
+ self.inner.into_string().map_err(|buf| Buf { inner: buf })
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn push_slice(&mut self, s: &Slice) {
+ self.inner.push_wtf8(&s.inner)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn reserve(&mut self, additional: usize) {
+ self.inner.reserve(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn reserve_exact(&mut self, additional: usize) {
+ self.inner.reserve_exact(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.inner.try_reserve_exact(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn shrink_to_fit(&mut self) {
+ self.inner.shrink_to_fit()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn shrink_to(&mut self, min_capacity: usize) {
+ self.inner.shrink_to(min_capacity)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn leak<'a>(self) -> &'a mut Slice {
+ unsafe { mem::transmute(self.inner.leak()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(self) -> Box {
+ unsafe { mem::transmute(self.inner.into_box()) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn from_box(boxed: Box) -> Buf {
+ let inner: Box = unsafe { mem::transmute(boxed) };
+ Buf { inner: Wtf8Buf::from_box(inner) }
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_arc(&self) -> Arc {
+ self.as_slice().into_arc()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_rc(&self) -> Rc {
+ self.as_slice().into_rc()
+ }
+
+ /// Provides plumbing to core `Vec::truncate`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub(crate) fn truncate(&mut self, len: usize) {
+ self.inner.truncate(len);
+ }
+
+ /// Provides plumbing to core `Vec::extend_from_slice`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+ self.inner.extend_from_slice(other);
+ }
+}
+
+impl Slice {
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_string_lossy(&self) -> Cow<'_, str> {
+ self.inner.to_string_lossy()
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_owned(&self) -> Buf {
+ Buf { inner: self.inner.to_owned() }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn clone_into(&self, buf: &mut Buf) {
+ self.inner.clone_into(&mut buf.inner)
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(&self) -> Box {
+ unsafe { mem::transmute(self.inner.into_box()) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn empty_box() -> Box {
+ unsafe { mem::transmute(Wtf8::empty_box()) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_arc(&self) -> Arc {
+ let arc = self.inner.into_arc();
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_rc(&self) -> Rc {
+ let rc = self.inner.into_rc();
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_lowercase(&self) -> Buf {
+ Buf { inner: self.inner.to_ascii_lowercase() }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_uppercase(&self) -> Buf {
+ Buf { inner: self.inner.to_ascii_uppercase() }
+ }
+}
diff --git a/library/alloc/src/ffi/wtf8.rs b/library/alloc/src/ffi/wtf8.rs
new file mode 100644
index 0000000000000..c9d9f0dc0c0fb
--- /dev/null
+++ b/library/alloc/src/ffi/wtf8.rs
@@ -0,0 +1,703 @@
+#![allow(missing_docs)]
+#![allow(missing_debug_implementations)]
+
+//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
+//!
+//! This library uses Rust’s type system to maintain
+//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
+//! like the `String` and `&str` types do for UTF-8.
+//!
+//! Since [WTF-8 must not be used
+//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
+//! this library deliberately does not provide access to the underlying bytes
+//! of WTF-8 strings,
+//! nor can it decode WTF-8 from arbitrary bytes.
+//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
+
+// this module is imported from @SimonSapin's repo and has tons of dead code on
+// unix (it's mostly used on windows), so don't worry about dead code here.
+#![allow(dead_code)]
+
+#[cfg(test)]
+mod tests;
+
+use core::char::encode_utf8_raw;
+use core::ffi::wtf8::*;
+use core::hash::{Hash, Hasher};
+use core::{fmt, mem, ops, str};
+
+use crate::borrow::{Cow, ToOwned};
+use crate::boxed::Box;
+use crate::collections::TryReserveError;
+use crate::rc::Rc;
+use crate::string::String;
+use crate::sync::Arc;
+use crate::vec::Vec;
+
+/// An owned, growable string of well-formed WTF-8 data.
+///
+/// Similar to `String`, but can additionally contain surrogate code points
+/// if they’re not in a surrogate pair.
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
+pub struct Wtf8Buf {
+ bytes: Vec,
+
+ /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily
+ /// know this if we're constructed from a `String` or `&str`.
+ ///
+ /// It is possible for `bytes` to have valid UTF-8 without this being
+ /// set, such as when we're concatenating `&Wtf8`'s and surrogates become
+ /// paired, as we don't bother to rescan the entire string.
+ is_known_utf8: bool,
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl ops::Deref for Wtf8Buf {
+ type Target = Wtf8;
+
+ fn deref(&self) -> &Wtf8 {
+ self.as_slice()
+ }
+}
+
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl ops::DerefMut for Wtf8Buf {
+ fn deref_mut(&mut self) -> &mut Wtf8 {
+ self.as_mut_slice()
+ }
+}
+
+/// Format the string with double quotes,
+/// and surrogates as `\u` followed by four hexadecimal digits.
+/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl fmt::Debug for Wtf8Buf {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&**self, formatter)
+ }
+}
+
+impl Wtf8Buf {
+ /// Creates a new, empty WTF-8 string.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn new() -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::new(), is_known_utf8: true }
+ }
+
+ /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn with_capacity(capacity: usize) -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
+ }
+
+ /// Creates a WTF-8 string from a WTF-8 byte vec.
+ ///
+ /// Since the byte vec is not checked for valid WTF-8, this functions is
+ /// marked unsafe.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub unsafe fn from_bytes_unchecked(value: Vec) -> Wtf8Buf {
+ Wtf8Buf { bytes: value, is_known_utf8: false }
+ }
+
+ /// Creates a WTF-8 string from a UTF-8 `String`.
+ ///
+ /// This takes ownership of the `String` and does not copy.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_string(string: String) -> Wtf8Buf {
+ Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true }
+ }
+
+ /// Creates a WTF-8 string from a UTF-8 `&str` slice.
+ ///
+ /// This copies the content of the slice.
+ ///
+ /// Since WTF-8 is a superset of UTF-8, this always succeeds.
+ #[inline]
+ pub fn from_str(str: &str) -> Wtf8Buf {
+ Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true }
+ }
+
+ pub fn clear(&mut self) {
+ self.bytes.clear();
+ self.is_known_utf8 = true;
+ }
+
+ /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units.
+ ///
+ /// This is lossless: calling `.encode_wide()` on the resulting string
+ /// will always return the original code units.
+ pub fn from_wide(v: &[u16]) -> Wtf8Buf {
+ let mut string = Wtf8Buf::with_capacity(v.len());
+ for item in char::decode_utf16(v.iter().cloned()) {
+ match item {
+ Ok(ch) => string.push_char(ch),
+ Err(surrogate) => {
+ let surrogate = surrogate.unpaired_surrogate();
+ // Surrogates are known to be in the code point range.
+ let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
+ // The string will now contain an unpaired surrogate.
+ string.is_known_utf8 = false;
+ // Skip the WTF-8 concatenation check,
+ // surrogate pairs are already decoded by decode_utf16
+ string.push_code_point_unchecked(code_point);
+ }
+ }
+ }
+ string
+ }
+
+ /// Copied from String::push
+ /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
+ fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
+ let mut bytes = [0; 4];
+ let bytes = encode_utf8_raw(code_point.to_u32(), &mut bytes);
+ self.bytes.extend_from_slice(bytes)
+ }
+
+ #[inline]
+ pub fn as_slice(&self) -> &Wtf8 {
+ unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
+ }
+
+ #[inline]
+ pub fn as_mut_slice(&mut self) -> &mut Wtf8 {
+ // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would
+ // cause them to change from well-formed UTF-8 to ill-formed UTF-8,
+ // which would break the assumptions of the `is_known_utf8` field.
+ unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
+ }
+
+ /// Reserves capacity for at least `additional` more bytes to be inserted
+ /// in the given `Wtf8Buf`.
+ /// The collection may reserve more space to avoid frequent reallocations.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the new capacity overflows `usize`.
+ #[inline]
+ pub fn reserve(&mut self, additional: usize) {
+ self.bytes.reserve(additional)
+ }
+
+ /// Tries to reserve capacity for at least `additional` more length units
+ /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid
+ /// frequent reallocations. After calling `try_reserve`, capacity will be
+ /// greater than or equal to `self.len() + additional`. Does nothing if
+ /// capacity is already sufficient. This method preserves the contents even
+ /// if an error occurs.
+ ///
+ /// # Errors
+ ///
+ /// If the capacity overflows, or the allocator reports a failure, then an error
+ /// is returned.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.bytes.try_reserve(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn reserve_exact(&mut self, additional: usize) {
+ self.bytes.reserve_exact(additional)
+ }
+
+ /// Tries to reserve the minimum capacity for exactly `additional`
+ /// length units in the given `Wtf8Buf`. After calling
+ /// `try_reserve_exact`, capacity will be greater than or equal to
+ /// `self.len() + additional` if it returns `Ok(())`.
+ /// Does nothing if the capacity is already sufficient.
+ ///
+ /// Note that the allocator may give the `Wtf8Buf` more space than it
+ /// requests. Therefore, capacity can not be relied upon to be precisely
+ /// minimal. Prefer [`try_reserve`] if future insertions are expected.
+ ///
+ /// [`try_reserve`]: Wtf8Buf::try_reserve
+ ///
+ /// # Errors
+ ///
+ /// If the capacity overflows, or the allocator reports a failure, then an error
+ /// is returned.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
+ self.bytes.try_reserve_exact(additional)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.bytes.shrink_to_fit()
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn shrink_to(&mut self, min_capacity: usize) {
+ self.bytes.shrink_to(min_capacity)
+ }
+
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn leak<'a>(self) -> &'a mut Wtf8 {
+ unsafe { Wtf8::from_mut_bytes_unchecked(self.bytes.leak()) }
+ }
+
+ /// Returns the number of bytes that this string buffer can hold without reallocating.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn capacity(&self) -> usize {
+ self.bytes.capacity()
+ }
+
+ /// Append a UTF-8 slice at the end of the string.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn push_str(&mut self, other: &str) {
+ self.bytes.extend_from_slice(other.as_bytes())
+ }
+
+ /// Append a WTF-8 slice at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn push_wtf8(&mut self, other: &Wtf8) {
+ match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) {
+ // Replace newly paired surrogates by a supplementary code point.
+ (Some(lead), Some(trail)) => {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ let other_without_trail_surrogate = &other.as_bytes()[3..];
+ // 4 bytes for the supplementary code point
+ self.bytes.reserve(4 + other_without_trail_surrogate.len());
+ self.push_char(decode_surrogate_pair(lead, trail));
+ self.bytes.extend_from_slice(other_without_trail_surrogate);
+ }
+ _ => {
+ // If we'll be pushing a string containing a surrogate, we may
+ // no longer have UTF-8.
+ if other.next_surrogate(0).is_some() {
+ self.is_known_utf8 = false;
+ }
+
+ self.bytes.extend_from_slice(other.as_bytes());
+ }
+ }
+ }
+
+ /// Append a Unicode scalar value at the end of the string.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn push_char(&mut self, c: char) {
+ self.push_code_point_unchecked(CodePoint::from_char(c))
+ }
+
+ /// Append a code point at the end of the string.
+ ///
+ /// This replaces newly paired surrogates at the boundary
+ /// with a supplementary code point,
+ /// like concatenating ill-formed UTF-16 strings effectively would.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn push(&mut self, code_point: CodePoint) {
+ if let Some(trail) = code_point.to_trail_surrogate() {
+ if let Some(lead) = (&*self).final_lead_surrogate() {
+ let len_without_lead_surrogate = self.len() - 3;
+ self.bytes.truncate(len_without_lead_surrogate);
+ self.push_char(decode_surrogate_pair(lead, trail));
+ return;
+ }
+
+ // We're pushing a trailing surrogate.
+ self.is_known_utf8 = false;
+ } else if code_point.to_lead_surrogate().is_some() {
+ // We're pushing a leading surrogate.
+ self.is_known_utf8 = false;
+ }
+
+ // No newly paired surrogates at the boundary.
+ self.push_code_point_unchecked(code_point)
+ }
+
+ /// Shortens a string to the specified length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `new_len` > current length,
+ /// or if `new_len` is not a code point boundary.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn truncate(&mut self, new_len: usize) {
+ assert!(is_code_point_boundary(self, new_len));
+ self.bytes.truncate(new_len)
+ }
+
+ /// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_bytes(self) -> Vec {
+ self.bytes
+ }
+
+ /// Consumes the WTF-8 string and tries to convert it to UTF-8.
+ ///
+ /// This does not copy the data.
+ ///
+ /// If the contents are not well-formed UTF-8
+ /// (that is, if the string contains surrogates),
+ /// the original WTF-8 string is returned instead.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn into_string(self) -> Result {
+ if self.is_known_utf8 || self.next_surrogate(0).is_none() {
+ Ok(unsafe { String::from_utf8_unchecked(self.bytes) })
+ } else {
+ Err(self)
+ }
+ }
+
+ /// Consumes the WTF-8 string and converts it lossily to UTF-8.
+ ///
+ /// This does not copy the data (but may overwrite parts of it in place).
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”)
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn into_string_lossy(mut self) -> String {
+ // Fast path: If we already have UTF-8, we can return it immediately.
+ if self.is_known_utf8 {
+ return unsafe { String::from_utf8_unchecked(self.bytes) };
+ }
+
+ let mut pos = 0;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ pos = surrogate_pos + 3;
+ self.bytes[surrogate_pos..pos]
+ .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ }
+ None => return unsafe { String::from_utf8_unchecked(self.bytes) },
+ }
+ }
+ }
+
+ /// Converts this `Wtf8Buf` into a boxed `Wtf8`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(self) -> Box {
+ // SAFETY: relies on `Wtf8` being `repr(transparent)`.
+ unsafe { mem::transmute(self.bytes.into_boxed_slice()) }
+ }
+
+ /// Converts a `Box` into a `Wtf8Buf`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn from_box(boxed: Box) -> Wtf8Buf {
+ let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
+ Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false }
+ }
+
+ /// Provides plumbing to core `Vec::extend_from_slice`.
+ /// More well behaving alternative to allowing outer types
+ /// full mutable access to the core `Vec`.
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
+ self.bytes.extend_from_slice(other);
+ self.is_known_utf8 = false;
+ }
+}
+
+/// Creates a new WTF-8 string from an iterator of code points.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl FromIterator for Wtf8Buf {
+ fn from_iter>(iter: T) -> Wtf8Buf {
+ let mut string = Wtf8Buf::new();
+ string.extend(iter);
+ string
+ }
+}
+
+/// Append code points from an iterator to the string.
+///
+/// This replaces surrogate code point pairs with supplementary code points,
+/// like concatenating ill-formed UTF-16 strings effectively would.
+#[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+)]
+impl Extend for Wtf8Buf {
+ fn extend>(&mut self, iter: T) {
+ let iterator = iter.into_iter();
+ let (low, _high) = iterator.size_hint();
+ // Lower bound of one byte per code point (ASCII only)
+ self.bytes.reserve(low);
+ iterator.for_each(move |code_point| self.push(code_point));
+ }
+
+ #[inline]
+ fn extend_one(&mut self, code_point: CodePoint) {
+ self.push(code_point);
+ }
+
+ #[inline]
+ fn extend_reserve(&mut self, additional: usize) {
+ // Lower bound of one byte per code point (ASCII only)
+ self.bytes.reserve(additional);
+ }
+}
+
+impl Wtf8 {
+ /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`.
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_owned(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.as_bytes().to_vec(), is_known_utf8: false }
+ }
+
+ /// Lossily converts the string to UTF-8.
+ /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8.
+ ///
+ /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”).
+ ///
+ /// This only copies the data if necessary (if it contains any surrogate).
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn to_string_lossy(&self) -> Cow<'_, str> {
+ let surrogate_pos = match self.next_surrogate(0) {
+ None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(self.as_bytes()) }),
+ Some((pos, _)) => pos,
+ };
+ let wtf8_bytes = self.as_bytes();
+ let mut utf8_bytes = Vec::with_capacity(self.len());
+ utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]);
+ utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ let mut pos = surrogate_pos + 3;
+ loop {
+ match self.next_surrogate(pos) {
+ Some((surrogate_pos, _)) => {
+ utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]);
+ utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
+ pos = surrogate_pos + 3;
+ }
+ None => {
+ utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]);
+ return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) });
+ }
+ }
+ }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn clone_into(&self, buf: &mut Wtf8Buf) {
+ buf.is_known_utf8 = false;
+ self.as_bytes().clone_into(&mut buf.bytes);
+ }
+
+ /// Boxes this `Wtf8`.
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_box(&self) -> Box {
+ let boxed: Box<[u8]> = self.as_bytes().into();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ /// Creates a boxed, empty `Wtf8`.
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ pub fn empty_box() -> Box {
+ let boxed: Box<[u8]> = Default::default();
+ unsafe { mem::transmute(boxed) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_arc(&self) -> Arc {
+ let arc: Arc<[u8]> = Arc::from(self.as_bytes());
+ unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn into_rc(&self) -> Rc {
+ let rc: Rc<[u8]> = Rc::from(self.as_bytes());
+ unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_lowercase(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.as_bytes().to_ascii_lowercase(), is_known_utf8: false }
+ }
+
+ #[rustc_allow_incoherent_impl]
+ #[unstable(
+ feature = "os_str_internals",
+ reason = "internal details of the implementation of os str",
+ issue = "none"
+ )]
+ #[inline]
+ pub fn to_ascii_uppercase(&self) -> Wtf8Buf {
+ Wtf8Buf { bytes: self.as_bytes().to_ascii_uppercase(), is_known_utf8: false }
+ }
+}
+
+impl Hash for Wtf8Buf {
+ #[inline]
+ fn hash(&self, state: &mut H) {
+ state.write(&self.bytes);
+ 0xfeu8.hash(state)
+ }
+}
diff --git a/library/alloc/src/ffi/wtf8/tests.rs b/library/alloc/src/ffi/wtf8/tests.rs
new file mode 100644
index 0000000000000..8f9c11a1c9ea8
--- /dev/null
+++ b/library/alloc/src/ffi/wtf8/tests.rs
@@ -0,0 +1,616 @@
+use super::*;
+
+#[test]
+fn wtf8buf_new() {
+ assert_eq!(Wtf8Buf::new().bytes, b"");
+}
+
+#[test]
+fn wtf8buf_from_str() {
+ assert_eq!(Wtf8Buf::from_str("").bytes, b"");
+ assert_eq!(Wtf8Buf::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+}
+
+#[test]
+fn wtf8buf_from_string() {
+ assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b"");
+ assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+}
+
+#[test]
+fn wtf8buf_from_wide() {
+ let buf = Wtf8Buf::from_wide(&[]);
+ assert_eq!(buf.bytes, b"");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xD800]);
+ assert_eq!(buf.bytes, b"\xED\xA0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDBFF]);
+ assert_eq!(buf.bytes, b"\xED\xAF\xBF");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDC00]);
+ assert_eq!(buf.bytes, b"\xED\xB0\x80");
+ assert!(!buf.is_known_utf8);
+
+ let buf = Wtf8Buf::from_wide(&[0xDFFF]);
+ assert_eq!(buf.bytes, b"\xED\xBF\xBF");
+ assert!(!buf.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_push_str() {
+ let mut string = Wtf8Buf::new();
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+
+ string.push_str("aé 💩");
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_push_char() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
+ string.push_char('💩');
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_push() {
+ let mut string = Wtf8Buf::from_str("aé ");
+ assert_eq!(string.bytes, b"a\xC3\xA9 ");
+ assert!(string.is_known_utf8);
+
+ string.push(CodePoint::from_char('💩'));
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD83D)); // lead
+ assert!(!string.is_known_utf8);
+ string.push(c(0x20)); // not surrogate
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
+ string.push(c(0xDBFF)); // lead
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD800)); // lead
+ assert!(!string.is_known_utf8);
+ string.push(c(0xE000)); // not surrogate
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xD7FF)); // not surrogate
+ assert!(string.is_known_utf8);
+ string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
+ assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0x61)); // not surrogate, < 3 bytes
+ assert!(string.is_known_utf8);
+ string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
+ assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+
+ let mut string = Wtf8Buf::new();
+ string.push(c(0xDC00)); // trail
+ assert!(!string.is_known_utf8);
+ assert_eq!(string.bytes, b"\xED\xB0\x80");
+}
+
+#[test]
+fn wtf8buf_push_wtf8() {
+ let mut string = Wtf8Buf::from_str("aé");
+ assert_eq!(string.bytes, b"a\xC3\xA9");
+ string.push_wtf8(Wtf8::from_str(" 💩"));
+ assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+
+ fn w(v: &[u8]) -> &Wtf8 {
+ unsafe { Wtf8::from_bytes_unchecked(v) }
+ }
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b" ")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
+ assert!(!string.is_known_utf8);
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
+ assert!(!string.is_known_utf8);
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
+ assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
+ assert!(!string.is_known_utf8);
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
+
+ let mut string = Wtf8Buf::new();
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ assert_eq!(string.bytes, b"\xED\xB0\x80");
+ assert!(!string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_truncate() {
+ let mut string = Wtf8Buf::from_str("aé");
+ assert!(string.is_known_utf8);
+
+ string.truncate(3);
+ assert_eq!(string.bytes, b"a\xC3\xA9");
+ assert!(string.is_known_utf8);
+
+ string.truncate(1);
+ assert_eq!(string.bytes, b"a");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+fn wtf8buf_truncate_around_non_bmp() {
+ let mut string = Wtf8Buf::from_str("💩");
+ assert!(string.is_known_utf8);
+
+ string.truncate(4);
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9");
+ assert!(string.is_known_utf8);
+
+ string.truncate(0);
+ assert_eq!(string.bytes, b"");
+ assert!(string.is_known_utf8);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_fail_code_point_boundary() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(2);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_fail_longer() {
+ let mut string = Wtf8Buf::from_str("aé");
+ string.truncate(4);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp3() {
+ let mut string = Wtf8Buf::from_str("💩");
+ assert!(string.is_known_utf8);
+ string.truncate(3);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp2() {
+ let mut string = Wtf8Buf::from_str("💩");
+ assert!(string.is_known_utf8);
+ string.truncate(2);
+}
+
+#[test]
+#[should_panic]
+fn wtf8buf_truncate_splitting_non_bmp1() {
+ let mut string = Wtf8Buf::from_str("💩");
+ assert!(string.is_known_utf8);
+ string.truncate(1);
+}
+
+#[test]
+fn wtf8buf_into_string() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert!(string.is_known_utf8);
+ assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩")));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert!(!string.is_known_utf8);
+ assert_eq!(string.clone().into_string(), Err(string));
+}
+
+#[test]
+fn wtf8buf_into_string_lossy() {
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩"));
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�"));
+}
+
+#[test]
+fn wtf8buf_from_iterator() {
+ fn f(values: &[u32]) -> Wtf8Buf {
+ values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::()
+ }
+ assert_eq!(
+ f(&[0x61, 0xE9, 0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
+
+ assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(
+ f(&[0xD83D, 0x20, 0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD800, 0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0xD7FF, 0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ f(&[0x61, 0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false });
+}
+
+#[test]
+fn wtf8buf_extend() {
+ fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf {
+ fn c(value: &u32) -> CodePoint {
+ CodePoint::from_u32(*value).unwrap()
+ }
+ let mut string = initial.iter().map(c).collect::();
+ string.extend(extended.iter().map(c));
+ string
+ }
+
+ assert_eq!(
+ e(&[0x61, 0xE9], &[0x20, 0x1F4A9]),
+ Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true }
+ );
+
+ assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(
+ e(&[0xD83D, 0x20], &[0xDCA9]),
+ Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xDBFF]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD800], &[0xE000]),
+ Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0xD7FF], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[0x61], &[0xDC00]),
+ Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+ assert_eq!(
+ e(&[], &[0xDC00]),
+ Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }
+ );
+}
+
+#[test]
+fn wtf8buf_show() {
+ let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(format!("{string:?}"), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\"");
+}
+
+#[test]
+fn wtf8buf_as_slice() {
+ assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé"));
+}
+
+#[test]
+fn wtf8buf_show_str() {
+ let text = "a\té 💩\r";
+ let string = Wtf8Buf::from_str(text);
+ assert_eq!(format!("{text:?}"), format!("{string:?}"));
+}
+
+#[test]
+fn wtf8_code_points() {
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
+ fn cp(string: &Wtf8Buf) -> Vec