diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 3bddc3772e600..c2ea457f46b14 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -253,6 +253,7 @@ pub mod slice; pub mod str; pub mod time; +pub mod needle; pub mod unicode; /* Async */ diff --git a/library/core/src/needle/ext.rs b/library/core/src/needle/ext.rs new file mode 100644 index 0000000000000..ce53acbd43295 --- /dev/null +++ b/library/core/src/needle/ext.rs @@ -0,0 +1,957 @@ +//! Extension functions which can be applied on any pairs of [`Haystack`]/[`Needle`]. + +use super::haystack::{Hay, Haystack, Span}; +use super::needle::{ + Consumer, DoubleEndedConsumer, DoubleEndedSearcher, Needle, ReverseConsumer, ReverseSearcher, + Searcher, +}; +use crate::fmt; +use crate::iter::FusedIterator; +use crate::ops::Range; + +macro_rules! generate_clone_and_debug { + ($name:ident, $field:tt) => { + impl Clone for $name + where + H: Haystack + Clone, + S: Clone, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + fn clone(&self) -> Self { + $name { $field: self.$field.clone() } + } + fn clone_from(&mut self, src: &Self) { + self.$field.clone_from(&src.$field); + } + } + + impl fmt::Debug for $name + where + H: Haystack + fmt::Debug, + S: fmt::Debug, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple(stringify!($name)).field(&self.$field).finish() + } + } + }; +} + +macro_rules! generate_pattern_iterators { + { + // Forward iterator + forward: + $(#[$forward_iterator_attribute:meta])* + struct $forward_iterator:ident; + + // Reverse iterator + reverse: + $(#[$reverse_iterator_attribute:meta])* + struct $reverse_iterator:ident; + + // Stability of all generated items + stability: + $(#[$common_stability_attribute:meta])* + + // Internal almost-iterator that is being delegated to + internal: + $internal_iterator:ident yielding ($iterty:ty); + + // Kind of delegation - either single ended or double ended + delegate $($t:tt)* + } => { + $(#[$forward_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $forward_iterator($internal_iterator) + where + H::Target: Hay, // FIXME: RFC 2089 or 2289 + H: Haystack; + + generate_clone_and_debug!($forward_iterator, 0); + + $(#[$common_stability_attribute])* + impl Iterator for $forward_iterator + where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + } + + $(#[$reverse_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $reverse_iterator($internal_iterator) + where + H::Target: Hay, // FIXME: RFC 2089 or 2289 + H: Haystack; + + generate_clone_and_debug!($reverse_iterator, 0); + + $(#[$common_stability_attribute])* + impl Iterator for $reverse_iterator + where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option { + self.0.next_back() + } + } + + #[stable(feature = "fused", since = "1.26.0")] + impl FusedIterator for $forward_iterator + where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + {} + + #[stable(feature = "fused", since = "1.26.0")] + impl FusedIterator for $reverse_iterator + where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + {} + + generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, + $forward_iterator, + $reverse_iterator); + }; + { + double ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident + } => { + $(#[$common_stability_attribute])* + impl DoubleEndedIterator for $forward_iterator + where + H: Haystack, + S: DoubleEndedSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + #[inline] + fn next_back(&mut self) -> Option { + self.0.next_back() + } + } + + $(#[$common_stability_attribute])* + impl DoubleEndedIterator for $reverse_iterator + where + H: Haystack, + S: DoubleEndedSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 + { + #[inline] + fn next_back(&mut self) -> Option { + self.0.next() + } + } + }; + { + single ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident + } => {} +} + +//------------------------------------------------------------------------------ +// Starts with / Ends with +//------------------------------------------------------------------------------ + +/// Returns `true` if the given needle matches a prefix of the haystack. +/// +/// Returns `false` if it does not. +pub fn starts_with(haystack: H, needle: P) -> bool +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_consumer().consume((*haystack).into()).is_some() +} + +/// Returns `true` if the given needle matches a suffix of this haystack. +/// +/// Returns `false` if it does not. +#[inline] +pub fn ends_with(haystack: H, needle: P) -> bool +where + H: Haystack, + P: Needle, + P::Consumer: ReverseConsumer, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_consumer().rconsume((*haystack).into()).is_some() +} + +//------------------------------------------------------------------------------ +// Trim +//------------------------------------------------------------------------------ + +/// Returns a haystack slice with all prefixes that match the needle repeatedly removed. +#[inline] +pub fn trim_start(haystack: H, needle: P) -> H +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + let range = { + let hay = &*haystack; + let start = needle.into_consumer().trim_start(hay); + let end = hay.end_index(); + start..end + }; + // SAFETY: the start and end indices of `range` are returned from `trim_start` and `end_index`, + // and both are valid indices. + unsafe { haystack.slice_unchecked(range) } +} + +/// Returns a haystack slice with all suffixes that match the needle repeatedly removed. +pub fn trim_end(haystack: H, needle: P) -> H +where + H: Haystack, + P: Needle, + P::Consumer: ReverseConsumer, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + let range = { + let hay = &*haystack; + let start = hay.start_index(); + let end = needle.into_consumer().trim_end(hay); + start..end + }; + // SAFETY: the start and end indices of `range` are returned from `start_index` and `trim_end`, + // and both are valid indices. + unsafe { haystack.slice_unchecked(range) } +} + +/// Returns a haystack slice with all prefixes and suffixes that match the needle +/// repeatedly removed. +pub fn trim(haystack: H, needle: P) -> H +where + H: Haystack, + P: Needle, + P::Consumer: DoubleEndedConsumer, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + let mut checker = needle.into_consumer(); + let range = { + let hay = &*haystack; + let end = checker.trim_end(hay); + // SAFETY: the start and end indices are returned from `start_index` and `trim_end`, + // and both are valid indices. + let hay = unsafe { Hay::slice_unchecked(hay, hay.start_index()..end) }; + let start = checker.trim_start(hay); + start..end + }; + // SAFETY: the start and end indices of `range` are returned from `trim_start` and `trim_end`, + // and both are valid indices. + unsafe { haystack.slice_unchecked(range) } +} + +//------------------------------------------------------------------------------ +// Matches +//------------------------------------------------------------------------------ + +#[derive(Debug, Clone)] +struct MatchesInternal +where + H: Haystack, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + searcher: S, + rest: Span, +} + +impl MatchesInternal +where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_spanned(&mut self) -> Option> { + let rest = self.rest.take(); + let range = self.searcher.search(rest.borrow())?; + // SAFETY: the start and end indices of `range` are returned from `search`, + // and both are valid indices. + let [_, middle, right] = unsafe { rest.split_around(range) }; + self.rest = right; + Some(middle) + } + + #[inline] + fn next(&mut self) -> Option { + Some(Span::into(self.next_spanned()?)) + } +} + +impl MatchesInternal +where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_back_spanned(&mut self) -> Option> { + let rest = self.rest.take(); + let range = self.searcher.rsearch(rest.borrow())?; + // SAFETY: the start and end indices of `range` are returned from `rsearch`, + // and both are valid indices. + let [left, middle, _] = unsafe { rest.split_around(range) }; + self.rest = left; + Some(middle) + } + + #[inline] + fn next_back(&mut self) -> Option { + Some(Span::into(self.next_back_spanned()?)) + } +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`matches`](self::matches). + struct Matches; + reverse: + /// Created with the function [`rmatches`](self::rmatches). + struct RMatches; + stability: + internal: + MatchesInternal yielding (H); + delegate double ended; +} + +/// An iterator over the disjoint matches of the needle within the given haystack. +pub fn matches(haystack: H, needle: P) -> Matches +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + Matches(MatchesInternal { searcher: needle.into_searcher(), rest: haystack.into() }) +} + +/// An iterator over the disjoint matches of the needle within the haystack, +/// yielded in reverse order. +pub fn rmatches(haystack: H, needle: P) -> RMatches +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RMatches(MatchesInternal { searcher: needle.into_searcher(), rest: haystack.into() }) +} + +/// Returns `true` if the given needle matches a sub-slice of the haystack. +/// +/// Returns `false` if it does not. +pub fn contains(haystack: H, needle: P) -> bool +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_searcher().search((*haystack).into()).is_some() +} + +//------------------------------------------------------------------------------ +// MatchIndices +//------------------------------------------------------------------------------ + +struct MatchIndicesInternal +where + H: Haystack, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + inner: MatchesInternal, +} + +generate_clone_and_debug!(MatchIndicesInternal, inner); + +impl MatchIndicesInternal +where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next(&mut self) -> Option<(::Index, H)> { + let span = self.inner.next_spanned()?; + let index = span.original_range().start; + Some((index, Span::into(span))) + } +} + +impl MatchIndicesInternal +where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_back(&mut self) -> Option<(::Index, H)> { + let span = self.inner.next_back_spanned()?; + let index = span.original_range().start; + Some((index, Span::into(span))) + } +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`match_indices`]. + struct MatchIndices; + reverse: + /// Created with the function [`rmatch_indices`]. + struct RMatchIndices; + stability: + internal: + MatchIndicesInternal yielding ((::Index, H)); + delegate double ended; +} + +/// An iterator over the disjoint matches of a needle within the haystack +/// as well as the index that the match starts at. +/// +/// For matches of `needle` within `haystack` that overlap, +/// only the indices corresponding to the first match are returned. +pub fn match_indices(haystack: H, needle: P) -> MatchIndices +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + MatchIndices(MatchIndicesInternal { inner: matches(haystack, needle).0 }) +} + +/// An iterator over the disjoint matches of a needle within the haystack, +/// yielded in reverse order along with the index of the match. +/// +/// For matches of `needle` within `haystack` that overlap, +/// only the indices corresponding to the last match are returned. +pub fn rmatch_indices(haystack: H, needle: P) -> RMatchIndices +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RMatchIndices(MatchIndicesInternal { inner: rmatches(haystack, needle).0 }) +} + +/// Returns the start index of first slice of the haystack that matches the needle. +/// +/// Returns [`None`] if the pattern doesn't match. +#[inline] +pub fn find(haystack: H, needle: P) -> Option<::Index> +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_searcher().search((*haystack).into()).map(|r| r.start) +} + +/// Returns the start index of last slice of the haystack that matches the needle. +/// +/// Returns [`None`] if the pattern doesn't match. +pub fn rfind(haystack: H, needle: P) -> Option<::Index> +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_searcher().rsearch((*haystack).into()).map(|r| r.start) +} + +//------------------------------------------------------------------------------ +// MatchRanges +//------------------------------------------------------------------------------ + +struct MatchRangesInternal +where + H: Haystack, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + inner: MatchesInternal, +} + +generate_clone_and_debug!(MatchRangesInternal, inner); + +impl MatchRangesInternal +where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next(&mut self) -> Option<(Range<::Index>, H)> { + let span = self.inner.next_spanned()?; + let range = span.original_range(); + Some((range, Span::into(span))) + } +} + +impl MatchRangesInternal +where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_back(&mut self) -> Option<(Range<::Index>, H)> { + let span = self.inner.next_back_spanned()?; + let range = span.original_range(); + Some((range, Span::into(span))) + } +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`match_ranges`]. + struct MatchRanges; + reverse: + /// Created with the function [`rmatch_ranges`]. + struct RMatchRanges; + stability: + internal: + MatchRangesInternal yielding ((Range<::Index>, H)); + delegate double ended; +} + +/// An iterator over the disjoint matches of a needle within the haystack +/// as well as the index ranges of each match. +/// +/// For matches of `needle` within `haystack` that overlap, +/// only the ranges corresponding to the first match are returned. +pub fn match_ranges(haystack: H, needle: P) -> MatchRanges +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + MatchRanges(MatchRangesInternal { inner: matches(haystack, needle).0 }) +} + +/// An iterator over the disjoint matches of a needle within the haystack, +/// yielded in reverse order along with the index range of the match. +/// +/// For matches of `needle` within `haystack` that overlap, +/// only the ranges corresponding to the last match are returned. +pub fn rmatch_ranges(haystack: H, needle: P) -> RMatchRanges +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RMatchRanges(MatchRangesInternal { inner: rmatches(haystack, needle).0 }) +} + +/// Returns the index range of first slice of the haystack that matches the needle. +/// +/// Returns [`None`] if the pattern doesn't match. +pub fn find_range(haystack: H, needle: P) -> Option::Index>> +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_searcher().search((*haystack).into()) +} + +/// Returns the start index of last slice of the haystack that matches the needle. +/// +/// Returns [`None`] if the pattern doesn't match. +pub fn rfind_range(haystack: H, needle: P) -> Option::Index>> +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + needle.into_searcher().rsearch((*haystack).into()) +} + +//------------------------------------------------------------------------------ +// Split +//------------------------------------------------------------------------------ + +#[derive(Debug, Clone)] +struct SplitInternal +where + H: Haystack, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + searcher: S, + rest: Span, + finished: bool, + allow_trailing_empty: bool, +} + +impl SplitInternal +where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let mut rest = self.rest.take(); + match self.searcher.search(rest.borrow()) { + Some(subrange) => { + // SAFETY: the start and end indices of `subrange` are returned from `search`, + // and both are valid indices. + let [left, _, right] = unsafe { rest.split_around(subrange) }; + self.rest = right; + rest = left; + } + None => { + self.finished = true; + if !self.allow_trailing_empty && rest.is_empty() { + return None; + } + } + } + Some(Span::into(rest)) + } +} + +impl SplitInternal +where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_back(&mut self) -> Option { + if self.finished { + return None; + } + + let rest = self.rest.take(); + let after = match self.searcher.rsearch(rest.borrow()) { + Some(range) => { + // SAFETY: the start and end indices of `subrange` are returned from `rsearch`, + // and both are valid indices. + let [left, _, right] = unsafe { rest.split_around(range) }; + self.rest = left; + right + } + None => { + self.finished = true; + rest + } + }; + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + if after.is_empty() { + return self.next_back(); + } + } + + Some(Span::into(after)) + } +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`split`]. + struct Split; + reverse: + /// Created with the function [`rsplit`]. + struct RSplit; + stability: + internal: + SplitInternal yielding (H); + delegate double ended; +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`split_terminator`]. + struct SplitTerminator; + reverse: + /// Created with the function [`rsplit_terminator`]. + struct RSplitTerminator; + stability: + internal: + SplitInternal yielding (H); + delegate double ended; +} + +/// An iterator over slices of the haystack, separated by parts matched by the needle. +pub fn split(haystack: H, needle: P) -> Split +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + Split(SplitInternal { + searcher: needle.into_searcher(), + rest: haystack.into(), + finished: false, + allow_trailing_empty: true, + }) +} + +/// An iterator over slices of the haystack, separated by parts matched by the needle +/// and yielded in reverse order. +pub fn rsplit(haystack: H, needle: P) -> RSplit +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RSplit(SplitInternal { + searcher: needle.into_searcher(), + rest: haystack.into(), + finished: false, + allow_trailing_empty: true, + }) +} + +/// An iterator over slices of the haystack, separated by parts matched by the needle. +/// +/// Equivalent to [`split`], except that the trailing slice is skipped if empty. +/// +/// This method can be used for haystack data that is *terminated*, +/// rather than *separated* by a needle. +pub fn split_terminator(haystack: H, needle: P) -> SplitTerminator +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + SplitTerminator(SplitInternal { + searcher: needle.into_searcher(), + rest: haystack.into(), + finished: false, + allow_trailing_empty: false, + }) +} + +/// An iterator over slices of the haystack, separated by parts matched by the needle +/// and yielded in reverse order. +/// +/// Equivalent to [`rsplit`], except that the trailing slice is skipped if empty. +/// +/// This method can be used for haystack data that is *terminated*, +/// rather than *separated* by a needle. +pub fn rsplit_terminator(haystack: H, needle: P) -> RSplitTerminator +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RSplitTerminator(SplitInternal { + searcher: needle.into_searcher(), + rest: haystack.into(), + finished: false, + allow_trailing_empty: false, + }) +} + +//------------------------------------------------------------------------------ +// SplitN +//------------------------------------------------------------------------------ + +#[derive(Clone, Debug)] +struct SplitNInternal +where + H: Haystack, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + searcher: S, + rest: Span, + n: usize, +} + +impl SplitNInternal +where + H: Haystack, + S: Searcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next(&mut self) -> Option { + let mut rest = self.rest.take(); + match self.n { + 0 => { + return None; + } + 1 => { + self.n = 0; + } + n => match self.searcher.search(rest.borrow()) { + Some(range) => { + // SAFETY: the start and end indices of `range` are returned from `search`, + // and both are valid indices. + let [left, _, right] = unsafe { rest.split_around(range) }; + self.n = n - 1; + self.rest = right; + rest = left; + } + None => { + self.n = 0; + } + }, + } + Some(Span::into(rest)) + } +} + +impl SplitNInternal +where + H: Haystack, + S: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn next_back(&mut self) -> Option { + let mut rest = self.rest.take(); + match self.n { + 0 => { + return None; + } + 1 => { + self.n = 0; + } + n => match self.searcher.rsearch(rest.borrow()) { + Some(range) => { + // SAFETY: the start and end indices of `range` are returned from `rsearch`, + // and both are valid indices. + let [left, _, right] = unsafe { rest.split_around(range) }; + self.n = n - 1; + self.rest = left; + rest = right; + } + None => { + self.n = 0; + } + }, + } + Some(Span::into(rest)) + } +} + +generate_pattern_iterators! { + forward: + /// Created with the function [`splitn`]. + struct SplitN; + reverse: + /// Created with the function [`rsplitn`]. + struct RSplitN; + stability: + internal: + SplitNInternal yielding (H); + delegate single ended; +} + +/// An iterator over slices of the given haystack, separated by a needle, +/// restricted to returning at most `n` items. +/// +/// If `n` slices are returned, +/// the last slice (the `n`th slice) will contain the remainder of the haystack. +pub fn splitn(haystack: H, n: usize, needle: P) -> SplitN +where + H: Haystack, + P: Needle, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + SplitN(SplitNInternal { searcher: needle.into_searcher(), rest: haystack.into(), n }) +} + +/// An iterator over slices of the given haystack, separated by a needle, +/// starting from the end of the haystack, restricted to returning at most `n` items. +/// +/// If `n` slices are returned, +/// the last slice (the `n`th slice) will contain the remainder of the haystack. +pub fn rsplitn(haystack: H, n: usize, needle: P) -> RSplitN +where + H: Haystack, + P: Needle, + P::Searcher: ReverseSearcher, + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + RSplitN(SplitNInternal { searcher: needle.into_searcher(), rest: haystack.into(), n }) +} + +//------------------------------------------------------------------------------ +// Replace +//------------------------------------------------------------------------------ + +/// Replaces all matches of a needle with another haystack. +pub fn replace_with(src: H, from: P, mut replacer: F, mut writer: W) +where + H: Haystack, + P: Needle, + F: FnMut(H) -> H, + W: FnMut(H), + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + let mut searcher = from.into_searcher(); + let mut src = Span::from(src); + while let Some(range) = searcher.search(src.borrow()) { + // SAFETY: the start and end indices of `range` are returned from `search`, + // and both are valid indices. + let [left, middle, right] = unsafe { src.split_around(range) }; + writer(Span::into(left)); + writer(replacer(Span::into(middle))); + src = right; + } + writer(Span::into(src)); +} + +/// Replaces first `n` matches of a needle with another haystack. +pub fn replacen_with(src: H, from: P, mut replacer: F, mut n: usize, mut writer: W) +where + H: Haystack, + P: Needle, + F: FnMut(H) -> H, + W: FnMut(H), + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + let mut searcher = from.into_searcher(); + let mut src = Span::from(src); + loop { + if n == 0 { + break; + } + n -= 1; + if let Some(range) = searcher.search(src.borrow()) { + // SAFETY: the start and end indices of `range` are returned from `search`, + // and both are valid indices. + let [left, middle, right] = unsafe { src.split_around(range) }; + writer(Span::into(left)); + writer(replacer(Span::into(middle))); + src = right; + } else { + break; + } + } + writer(Span::into(src)); +} diff --git a/library/core/src/needle/haystack.rs b/library/core/src/needle/haystack.rs new file mode 100644 index 0000000000000..2b26e15347e6b --- /dev/null +++ b/library/core/src/needle/haystack.rs @@ -0,0 +1,657 @@ +use crate::fmt::Debug; +use crate::ops::{Deref, Range}; + +/// Borrowed `Haystack`. +/// +/// Every [`Haystack`] type can be borrowed as references to `Hay` types. This +/// allows multiple similar types to share the same implementation (e.g. the +/// haystacks `&[T]`, `&mut [T]` and `Vec` all have the same corresponding +/// hay type `[T]`). +/// +/// In the other words, a `Haystack` is a generalized reference to `Hay`. +/// `Hay`s are typically implemented on unsized slice types like `str` and `[T]`. +/// +/// # Safety +/// +/// This trait is unsafe as there are some unchecked requirements which the +/// implementor must uphold. Failing to meet these requirements would lead to +/// out-of-bound access. The safety requirements are written in each member of +/// this trait. +pub unsafe trait Hay { + /// The index type of the haystack. Typically a `usize`. + /// + /// Splitting a hay must be sublinear using this index type. For instance, + /// if we implement `Hay` for a linked list, the index should not be an + /// integer offset (`usize`) as this would require O(n) time to chase the + /// pointer and find the split point. Instead, for a linked list we should + /// directly use the node pointer as the index. + /// + /// # Safety + /// + /// Valid indices of a single hay have a total order, even this type does + /// not require an `Ord` bound — for instance, to order two linked list + /// cursors, we need to chase the links and see if they meet; this is slow + /// and not suitable for implementing `Ord`, but conceptually an ordering + /// can be defined on linked list cursors. + type Index: Copy + Debug + Eq; + + /// Creates an empty hay. + /// + /// # Safety + /// + /// An empty hay's start and end indices must be the same, e.g. + /// + /// ```rust + /// #![feature(needle)] + /// use std::needle::Hay; + /// + /// let empty = ::empty(); + /// assert_eq!(empty.start_index(), empty.end_index()); + /// ``` + /// + /// This also suggests that there is exactly one valid index for an empty + /// hay. + /// + /// There is no guarantee that two separate calls to `.empty()` will produce + /// the same hay reference. + fn empty<'a>() -> &'a Self; + + /// Obtains the index to the start of the hay. + /// + /// Usually this method returns `0`. + /// + /// # Safety + /// + /// Implementation must ensure that the start index of hay is the first + /// valid index, i.e. for all valid indices `i` of `self`, we have + /// `self.start_index() <= i`. + fn start_index(&self) -> Self::Index; + + /// Obtains the index to the end of the hay. + /// + /// Usually this method returns the length of the hay. + /// + /// # Safety + /// + /// Implementation must ensure that the end index of hay is the last valid + /// index, i.e. for all valid indices `i` of `self`, we have + /// `i <= self.end_index()`. + fn end_index(&self) -> Self::Index; + + /// Returns the next immediate index in this hay. + /// + /// # Safety + /// + /// The `index` must be a valid index, and also must not equal to + /// `self.end_index()`. + /// + /// Implementation must ensure that if `j = self.next_index(i)`, then `j` + /// is also a valid index satisfying `j > i`. + /// + unsafe fn next_index(&self, index: Self::Index) -> Self::Index; + + /// Returns the previous immediate index in this hay. + /// + /// # Safety + /// + /// The `index` must be a valid index, and also must not equal to + /// `self.start_index()`. + /// + /// Implementation must ensure that if `j = self.prev_index(i)`, then `j` + /// is also a valid index satisfying `j < i`. + unsafe fn prev_index(&self, index: Self::Index) -> Self::Index; + + /// Obtains a child hay by slicing `self`. + /// + /// # Safety + /// + /// The two ends of the range must be valid indices. The start of the range + /// must be before the end of the range (`range.start <= range.end`). + unsafe fn slice_unchecked(&self, range: Range) -> &Self; +} + +/// Linear splittable structure. +/// +/// A `Haystack` is implemented for reference and collection types such as +/// `&str`, `&mut [T]` and `Vec`. Every haystack can be borrowed as an +/// underlying representation called a [`Hay`]. Multiple haystacks may share the +/// same hay type, and thus share the same implementation of string search +/// algorithms. +/// +/// In the other words, a `Haystack` is a generalized reference to `Hay`. +/// +/// # Safety +/// +/// This trait is unsafe as there are some unchecked requirements which the +/// implementor must uphold. Failing to meet these requirements would lead to +/// out-of-bound access. The safety requirements are written in each member of +/// this trait. +pub unsafe trait Haystack: Deref + Sized +where + Self::Target: Hay, +{ + /// Creates an empty haystack. + fn empty() -> Self; + + /// Splits the haystack into three slices around the given range. + /// + /// This method splits `self` into three non-overlapping parts: + /// + /// 1. Before the range (`self[..range.start]`), + /// 2. Inside the range (`self[range]`), and + /// 3. After the range (`self[range.end..]`) + /// + /// The returned array contains these three parts in order. + /// + /// # Safety + /// + /// Caller should ensure that the starts and end indices of `range` are + /// valid indices for the haystack `self` with `range.start <= range.end`. + /// + /// If the haystack is a mutable reference (`&mut A`), implementation must + /// ensure that the 3 returned haystack are truly non-overlapping in memory. + /// This is required to uphold the "Aliasing XOR Mutability" guarantee. If a + /// haystack cannot be physically split into non-overlapping parts (e.g. in + /// `OsStr`), then `&mut A` should not implement `Haystack` either. + /// + /// # Examples + /// + /// ```rust + /// #![feature(needle)] + /// use std::needle::Haystack; + /// + /// let haystack = &mut [0, 1, 2, 3, 4, 5, 6]; + /// let [left, middle, right] = unsafe { haystack.split_around(2..6) }; + /// assert_eq!(left, &mut [0, 1]); + /// assert_eq!(middle, &mut [2, 3, 4, 5]); + /// assert_eq!(right, &mut [6]); + /// ``` + unsafe fn split_around(self, range: Range<::Index>) -> [Self; 3]; + + /// Subslices this haystack. + /// + /// # Safety + /// + /// The starts and end indices of `range` must be valid indices for the + /// haystack `self` with `range.start <= range.end`. + unsafe fn slice_unchecked(self, range: Range<::Index>) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + let [_, middle, _] = unsafe { self.split_around(range) }; + middle + } + + /// Transforms the range from relative to self's parent to the original + /// haystack it was sliced from. + /// + /// Typically this method can be simply implemented as + /// + /// ```text + /// (original.start + parent.start)..(original.start + parent.end) + /// ``` + /// + /// If this haystack is a [`SharedHaystack`], this method would never be + /// called. + /// + /// # Safety + /// + /// The `parent` range should be a valid range relative to a hay *a*, which + /// was used to slice out *self*: `self == &a[parent]`. + /// + /// Similarly, the `original` range should be a valid range relative to + /// another hay *b* used to slice out *a*: `a == &b[original]`. + /// + /// The distance of `parent` must be consistent with the length of `self`. + /// + /// This method should return a range which satisfies: + /// + /// ```text + /// self == &b[parent][original] == &b[range] + /// ``` + /// + /// Slicing can be destructive and *invalidates* some indices, in particular + /// for owned type with a pointer-like index, e.g. linked list. In this + /// case, one should derive an entirely new index range from `self`, e.g. + /// returning `self.start_index()..self.end_index()`. + /// + fn restore_range( + &self, + original: Range<::Index>, + parent: Range<::Index>, + ) -> Range<::Index>; +} + +/// A [`Haystack`] which can be shared and cheaply cloned (e.g. `&H`, `Rc`). +/// +/// If a haystack implements this marker trait, during internal operations the +/// original haystack will be retained in full and cloned, rather than being +/// sliced and splitted. Being a shared haystack allows searcher to see the +/// entire haystack, including the consumed portion. +#[rustc_unsafe_specialization_marker] +pub unsafe trait SharedHaystack: Haystack + Clone +where + Self::Target: Hay, +{ +} + +/// The borrowing behavior differs between a (unique) haystack and shared +/// haystack. We use *specialization* to distinguish between these behavior: +/// +/// * When using `split_around()` and `slice_unchecked()` with a unique +/// haystack, the original haystack will be splitted or sliced accordingly +/// to maintain unique ownership. +/// * When using these functions with a shared haystack, the original haystack +/// will be cloned in full as that could provide more context into +/// searchers. +/// +/// This trait will never be public. +trait SpanBehavior: Haystack +where + Self::Target: Hay, +{ + fn take(&mut self) -> Self; + + fn from_span(span: Span) -> Self; + + unsafe fn split_around_for_span( + self, + subrange: Range<::Index>, + ) -> [Self; 3]; + + unsafe fn slice_unchecked_for_span(self, subrange: Range<::Index>) + -> Self; + + fn borrow_range( + &self, + range: Range<::Index>, + ) -> Range<::Index>; + + fn do_restore_range( + &self, + range: Range<::Index>, + subrange: Range<::Index>, + ) -> Range<::Index>; +} + +impl SpanBehavior for H +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + default fn take(&mut self) -> Self { + crate::mem::replace(self, Self::empty()) + } + + #[inline] + default fn from_span(span: Span) -> Self { + span.haystack + } + + #[inline] + default fn borrow_range( + &self, + _: Range<::Index>, + ) -> Range<::Index> { + self.start_index()..self.end_index() + } + + #[inline] + default fn do_restore_range( + &self, + range: Range<::Index>, + subrange: Range<::Index>, + ) -> Range<::Index> { + self.restore_range(range, subrange) + } + + #[inline] + default unsafe fn split_around_for_span( + self, + subrange: Range<::Index>, + ) -> [Self; 3] { + // SAFETY: the caller must guarantee that starts and end indices of `subrange` are valid indices. + unsafe { self.split_around(subrange) } + } + + #[inline] + default unsafe fn slice_unchecked_for_span( + self, + subrange: Range<::Index>, + ) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `subrange` are valid indices. + unsafe { self.slice_unchecked(subrange) } + } +} + +impl SpanBehavior for H +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn take(&mut self) -> Self { + self.clone() + } + + #[inline] + fn from_span(span: Span) -> Self { + // SAFETY: span's range is guaranteed to be valid for the haystack. + unsafe { span.haystack.slice_unchecked(span.range) } + } + + #[inline] + fn borrow_range( + &self, + range: Range<::Index>, + ) -> Range<::Index> { + range + } + + #[inline] + fn do_restore_range( + &self, + _: Range<::Index>, + subrange: Range<::Index>, + ) -> Range<::Index> { + subrange + } + + #[inline] + unsafe fn split_around_for_span(self, _: Range<::Index>) -> [Self; 3] { + [self.clone(), self.clone(), self] + } + + #[inline] + unsafe fn slice_unchecked_for_span(self, _: Range<::Index>) -> Self { + self + } +} + +/// A span is a haystack coupled with the original range where the haystack is found. +/// +/// It can be considered as a tuple `(H, Range)` +/// where the range is guaranteed to be valid for the haystack. +#[derive(Debug, Clone)] +pub struct Span +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + haystack: H, + range: Range<<::Target as Hay>::Index>, + //^ The `` is to trick `#[derive]` not to generate + // the where bound for `H::Target`. +} + +/// Creates a span which covers the entire haystack. +impl From for Span +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn from(haystack: H) -> Self { + let range = haystack.start_index()..haystack.end_index(); + Self { haystack, range } + } +} + +/// Slices the original haystack to the focused range. +impl From> for H +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + #[inline] + fn from(span: Span) -> Self { + H::from_span(span) + } +} + +impl Span +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + /// Decomposes this span into the original haystack, and the range it focuses on. + #[inline] + pub fn into_parts(self) -> (H, Range<::Index>) { + (self.haystack, self.range) + } + + /// Creates a span from a haystack, and a range it should focus on. + /// + /// # Safety + /// + /// The `range` must be a valid range relative to `haystack`. + #[inline] + pub unsafe fn from_parts(haystack: H, range: Range<::Index>) -> Self { + Self { haystack, range } + } +} + +impl<'h> Span<&'h str> { + /// Reinterprets the string span as a byte-array span. + #[inline] + pub fn as_bytes(self) -> Span<&'h [u8]> { + Span { haystack: self.haystack.as_bytes(), range: self.range } + } +} + +impl Span +where + H::Target: Hay, // FIXME: RFC 2089 or 2289 +{ + /// The range of the span, relative to the ultimate original haystack it was sliced from. + #[inline] + pub fn original_range(&self) -> Range<::Index> { + self.range.clone() + } + + /// Borrows a shared span. + #[inline] + pub fn borrow(&self) -> Span<&H::Target> { + Span { haystack: &*self.haystack, range: self.haystack.borrow_range(self.range.clone()) } + } + + /// Checks whether this span is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.range.start == self.range.end + } + + /// Returns this span by value, and replaces the original span by an empty + /// span. + #[inline] + pub fn take(&mut self) -> Self { + let haystack = self.haystack.take(); + let range = self.range.clone(); + self.range.end = self.range.start; + Span { haystack, range } + } + + /// Splits this span into three spans around the given range. + /// + /// # Safety + /// + /// `subrange` must be a valid range relative to `self.borrow()`. + #[inline] + pub unsafe fn split_around(self, subrange: Range<::Index>) -> [Self; 3] { + let self_range = self.haystack.borrow_range(self.range.clone()); + let [left, middle, right] = + // SAFETY: the caller must guarantee that starts and end indices of `subrange` are valid indices. + unsafe { self.haystack.split_around_for_span(subrange.clone()) }; + + let left_range = + left.do_restore_range(self.range.clone(), self_range.start..subrange.start); + let right_range = right.do_restore_range(self.range.clone(), subrange.end..self_range.end); + let middle_range = middle.do_restore_range(self.range, subrange); + + [ + Self { haystack: left, range: left_range }, + Self { haystack: middle, range: middle_range }, + Self { haystack: right, range: right_range }, + ] + } + + /// Slices this span to the given range. + /// + /// # Safety + /// + /// `subrange` must be a valid range relative to `self.borrow()`. + #[inline] + pub unsafe fn slice_unchecked(self, subrange: Range<::Index>) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `subrange` are valid indices. + let haystack = unsafe { self.haystack.slice_unchecked_for_span(subrange.clone()) }; + let range = haystack.do_restore_range(self.range, subrange); + Self { haystack, range } + } +} + +unsafe impl<'a, A: Hay + ?Sized + 'a> Haystack for &'a A { + #[inline] + fn empty() -> Self { + A::empty() + } + + #[inline] + unsafe fn split_around(self, range: Range) -> [Self; 3] { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { + [ + self.slice_unchecked(self.start_index()..range.start), + self.slice_unchecked(range.clone()), + self.slice_unchecked(range.end..self.end_index()), + ] + } + } + + #[inline] + unsafe fn slice_unchecked(self, range: Range) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { A::slice_unchecked(self, range) } + } + + #[inline] + fn restore_range(&self, _: Range, _: Range) -> Range { + unreachable!() + } +} + +unsafe impl<'a, A: Hay + ?Sized + 'a> SharedHaystack for &'a A {} + +unsafe impl Hay for str { + type Index = usize; + + #[inline] + fn empty<'a>() -> &'a Self { + "" + } + + #[inline] + fn start_index(&self) -> usize { + 0 + } + + #[inline] + fn end_index(&self) -> usize { + self.len() + } + + #[inline] + unsafe fn slice_unchecked(&self, range: Range) -> &Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { self.get_unchecked(range) } + } + + #[inline] + unsafe fn next_index(&self, index: Self::Index) -> Self::Index { + // SAFETY: the caller must guarantee that `index` is a valid index. + index + unsafe { self.get_unchecked(index..).chars().next().unwrap().len_utf8() } + } + + #[inline] + unsafe fn prev_index(&self, index: Self::Index) -> Self::Index { + // SAFETY: the caller must guarantee that `index` is a valid index. + index - unsafe { self.get_unchecked(..index).chars().next_back().unwrap().len_utf8() } + } +} + +unsafe impl<'h> Haystack for &'h mut str { + #[inline] + fn empty() -> &'h mut str { + Self::default() + } + + #[inline] + unsafe fn slice_unchecked(self, range: Range) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { self.get_unchecked_mut(range) } + } + + #[inline] + unsafe fn split_around(self, range: Range) -> [Self; 3] { + let (haystack, right) = self.split_at_mut(range.end); + let (left, middle) = haystack.split_at_mut(range.start); + [left, middle, right] + } + + #[inline] + fn restore_range(&self, range: Range, subrange: Range) -> Range { + (subrange.start + range.start)..(subrange.end + range.start) + } +} + +unsafe impl Hay for [T] { + type Index = usize; + + #[inline] + fn empty<'a>() -> &'a Self { + &[] + } + + #[inline] + fn start_index(&self) -> usize { + 0 + } + + #[inline] + fn end_index(&self) -> usize { + self.len() + } + + #[inline] + unsafe fn slice_unchecked(&self, range: Range) -> &Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { self.get_unchecked(range) } + } + + #[inline] + unsafe fn next_index(&self, index: Self::Index) -> Self::Index { + index + 1 + } + + #[inline] + unsafe fn prev_index(&self, index: Self::Index) -> Self::Index { + index - 1 + } +} + +unsafe impl<'h, T: 'h> Haystack for &'h mut [T] { + #[inline] + fn empty() -> Self { + &mut [] + } + + #[inline] + unsafe fn slice_unchecked(self, range: Range) -> Self { + // SAFETY: the caller must guarantee that starts and end indices of `range` are valid indices. + unsafe { self.get_unchecked_mut(range) } + } + + #[inline] + unsafe fn split_around(self, range: Range) -> [Self; 3] { + let (haystack, right) = self.split_at_mut(range.end); + let (left, middle) = haystack.split_at_mut(range.start); + [left, middle, right] + } + + #[inline] + fn restore_range(&self, range: Range, subrange: Range) -> Range { + (subrange.start + range.start)..(subrange.end + range.start) + } +} diff --git a/library/core/src/needle/mod.rs b/library/core/src/needle/mod.rs new file mode 100644 index 0000000000000..beaa4d32fdfb2 --- /dev/null +++ b/library/core/src/needle/mod.rs @@ -0,0 +1,36 @@ +#![unstable(feature = "needle", issue = "56345")] + +//! The Needle API, support generalized searching on strings, arrays and more. +//! +//! This module provides traits to facilitate searching [`Needle`] in a [`Haystack`]. +//! +//! Haystacks +//! ========= +//! +//! A *haystack* refers to any linear structure which can be split or sliced +//! into smaller, non-overlapping parts. Examples are strings and vectors. +//! +//! ```rust +//! let haystack: &str = "hello"; // a string slice (`&str`) is a haystack. +//! let (a, b) = haystack.split_at(4); // it can be split into two strings. +//! let c = &a[1..3]; // it can be sliced. +//! ``` +//! +//! The minimal haystack which cannot be further sliced is called a *codeword*. +//! For instance, the codeword of a string would be a UTF-8 sequence. A haystack +//! can therefore be viewed as a consecutive list of codewords. +//! +//! The boundary between codewords can be addressed using an *index*. The +//! numbers 1, 3 and 4 in the snippet above are sample indices of a string. An +//! index is usually a `usize`. +//! +//! An arbitrary number may point outside of a haystack, or in the interior of a +//! codeword. These indices are invalid. A *valid index* of a certain haystack +//! would only point to the boundaries. + +pub mod ext; +mod haystack; +mod needle; + +pub use self::haystack::*; +pub use self::needle::*; diff --git a/library/core/src/needle/needle.rs b/library/core/src/needle/needle.rs new file mode 100644 index 0000000000000..111f33e44967d --- /dev/null +++ b/library/core/src/needle/needle.rs @@ -0,0 +1,398 @@ +use super::haystack::{Hay, Haystack, Span}; + +use crate::ops::Range; + +/// A searcher, for searching a [`Needle`] from a [`Hay`]. +/// +/// This trait provides methods for searching for non-overlapping matches of a +/// needle starting from the front (left) of a hay. +/// +/// # Safety +/// +/// This trait is marked unsafe because the range returned by its methods are +/// required to lie on valid codeword boundaries in the haystack. This enables +/// users of this trait to slice the haystack without additional runtime checks. +/// +/// # Examples +/// +/// Implement a searcher and consumer which matches `b"Aaaa"` from a byte string. +/// +/// ```rust +/// #![feature(needle)] +/// use std::needle::*; +/// use std::ops::Range; +/// +/// // The searcher for searching `b"Aaaa"`, using naive search. +/// // We are going to use this as a needle too. +/// struct Aaaa; +/// +/// unsafe impl Searcher<[u8]> for Aaaa { +/// // search for an `b"Aaaa"` in the middle of the string, returns its range. +/// fn search(&mut self, span: Span<&[u8]>) -> Option> { +/// let (hay, range) = span.into_parts(); +/// +/// let start = range.start; +/// for (i, window) in hay[range].windows(4).enumerate() { +/// if *window == b"Aaaa"[..] { +/// // remember to include the range offset +/// return Some((start + i)..(start + i + 4)); +/// } +/// } +/// +/// None +/// } +/// } +/// +/// unsafe impl Consumer<[u8]> for Aaaa { +/// // checks if an `b"Aaaa" is at the beginning of the string, returns the end index. +/// fn consume(&mut self, span: Span<&[u8]>) -> Option { +/// let (hay, range) = span.into_parts(); +/// let end = range.start.checked_add(4)?; +/// if end <= range.end && hay[range.start..end] == b"Aaaa"[..] { +/// Some(end) +/// } else { +/// None +/// } +/// } +/// } +/// +/// impl> Needle for Aaaa { +/// type Searcher = Self; +/// type Consumer = Self; +/// fn into_searcher(self) -> Self { self } +/// fn into_consumer(self) -> Self { self } +/// } +/// +/// // test with some standard algorithms. +/// let haystack = &b"Aaaaa!!!Aaa!!!Aaaaaaaaa!!!"[..]; +/// assert_eq!( +/// ext::split(haystack, Aaaa).collect::>(), +/// vec![ +/// &b""[..], +/// &b"a!!!Aaa!!!"[..], +/// &b"aaaaa!!!"[..], +/// ] +/// ); +/// assert_eq!( +/// ext::match_ranges(haystack, Aaaa).collect::>(), +/// vec![ +/// (0..4, &b"Aaaa"[..]), +/// (14..18, &b"Aaaa"[..]), +/// ] +/// ); +/// assert_eq!( +/// ext::trim_start(haystack, Aaaa), +/// &b"a!!!Aaa!!!Aaaaaaaaa!!!"[..] +/// ); +/// ``` +pub unsafe trait Searcher { + /// Searches for the first range which the needle can be found in the span. + /// + /// This method is used to support the following standard algorithms: + /// + /// * [`matches`](super::ext::matches) + /// * [`contains`](super::ext::contains) + /// * [`match_indices`](super::ext::match_indices) + /// * [`find`](super::ext::find) + /// * [`match_ranges`](super::ext::match_ranges) + /// * [`find_range`](super::ext::find_range) + /// * [`split`](super::ext::split) + /// * [`split_terminator`](super::ext::split_terminator) + /// * [`splitn`](super::ext::splitn) + /// * [`replace_with`](super::ext::replace_with) + /// * [`replacen_with`](super::ext::replacen_with) + /// + /// The hay and the restricted range for searching can be recovered by + /// calling `span`[`.into_parts()`](Span::into_parts). The range returned + /// by this method + /// should be relative to the hay and must be contained within the + /// restricted range from the span. + /// + /// If the needle is not found, this method should return `None`. + /// + /// The reason this method takes a `Span<&A>` instead of just `&A` is + /// because some needles need context information provided by + /// the position of the current slice and the content around the slice. + /// Regex components like the start-/end-of-text anchors `^`/`$` + /// and word boundary `\b` are primary examples. + fn search(&mut self, span: Span<&A>) -> Option>; +} + +/// A consumer, for searching a [`Needle`] from a [`Hay`] anchored at the +/// beginnning. +/// +/// This trait provides methods for matching a needle anchored at the beginning +/// of a hay. +/// +/// See documentation of [`Searcher`] for an example. +/// +/// # Safety +/// +/// This trait is marked unsafe because the range returned by its methods are +/// required to lie on valid codeword boundaries in the haystack. This enables +/// users of this trait to slice the haystack without additional runtime checks. +pub unsafe trait Consumer { + /// Checks if the needle can be found at the beginning of the span. + /// + /// This method is used to implement the standard algorithm + /// [`starts_with()`](super::ext::starts_with) as well as providing the default + /// implementation for [`.trim_start()`](Consumer::trim_start). + /// + /// The hay and the restricted range for searching can be recovered by + /// calling `span`[`.into_parts()`](Span::into_parts). If a needle can be + /// found starting at `range.start`, this method should return the end index + /// of the needle relative to the hay. + /// + /// If the needle cannot be found at the beginning of the span, this method + /// should return `None`. + fn consume(&mut self, span: Span<&A>) -> Option; + + /// Repeatedly removes prefixes of the hay which matches the needle. + /// + /// This method is used to implement the standard algorithm + /// [`trim_start()`](super::ext::trim_start). + /// + /// Returns the start index of the slice after all prefixes are removed. + /// + /// A fast generic implementation in terms of + /// [`.consume()`](Consumer::consume) is provided by default. Nevertheless, + /// many needles allow a higher-performance specialization. + #[inline] + fn trim_start(&mut self, hay: &A) -> A::Index { + let mut offset = hay.start_index(); + let mut span = Span::from(hay); + while let Some(pos) = self.consume(span.clone()) { + offset = pos; + let (hay, range) = span.into_parts(); + if pos == range.start { + break; + } + // SAFETY: span's range is guaranteed to be valid for the haystack. + span = unsafe { Span::from_parts(hay, pos..range.end) }; + } + offset + } +} + +/// A searcher which can be searched from the end. +/// +/// This trait provides methods for searching for non-overlapping matches of a +/// needle starting from the back (right) of a hay. +/// +/// # Safety +/// +/// This trait is marked unsafe because the range returned by its methods are +/// required to lie on valid codeword boundaries in the haystack. This enables +/// users of this trait to slice the haystack without additional runtime checks. +pub unsafe trait ReverseSearcher: Searcher { + /// Searches for the last range which the needle can be found in the span. + /// + /// This method is used to support the following standard algorithms: + /// + /// * [`rmatches`](super::ext::rmatches) + /// * [`rmatch_indices`](super::ext::rmatch_indices) + /// * [`rfind`](super::ext::find) + /// * [`rmatch_ranges`](super::ext::rmatch_ranges) + /// * [`rfind_range`](super::ext::rfind_range) + /// * [`rsplit`](super::ext::rsplit) + /// * [`rsplit_terminator`](super::ext::rsplit_terminator) + /// * [`rsplitn`](super::ext::rsplitn) + /// + /// The hay and the restricted range for searching can be recovered by + /// calling `span`[`.into_parts()`](Span::into_parts). The returned range + /// should be relative to the hay and must be contained within the + /// restricted range from the span. + /// + /// If the needle is not found, this method should return `None`. + fn rsearch(&mut self, span: Span<&A>) -> Option>; +} + +/// A consumer which can be searched from the end. +/// +/// This trait provides methods for matching a needle anchored at the end of a +/// hay. +/// +/// # Safety +/// +/// This trait is marked unsafe because the range returned by its methods are +/// required to lie on valid codeword boundaries in the haystack. This enables +/// users of this trait to slice the haystack without additional runtime checks. +pub unsafe trait ReverseConsumer: Consumer { + /// Checks if the needle can be found at the end of the span. + /// + /// This method is used to implement the standard algorithm + /// [`ends_with()`](super::ext::ends_with) as well as providing the default + /// implementation for [`.trim_end()`](ReverseConsumer::trim_end). + /// + /// The hay and the restricted range for searching can be recovered by + /// calling `span`[`.into_parts()`](Span::into_parts). If a needle can be + /// found ending at `range.end`, this method should return the start index + /// of the needle relative to the hay. + /// + /// If the needle cannot be found at the end of the span, this method + /// should return `None`. + fn rconsume(&mut self, hay: Span<&A>) -> Option; + + /// Repeatedly removes suffixes of the hay which matches the needle. + /// + /// This method is used to implement the standard algorithm + /// [`trim_end()`](super::ext::trim_end). + /// + /// A fast generic implementation in terms of + /// [`.rconsume()`](ReverseConsumer::rconsume) is provided by default. + /// Nevertheless, many needles allow a higher-performance specialization. + #[inline] + fn trim_end(&mut self, hay: &A) -> A::Index { + let mut offset = hay.end_index(); + let mut span = Span::from(hay); + while let Some(pos) = self.rconsume(span.clone()) { + offset = pos; + let (hay, range) = span.into_parts(); + if pos == range.end { + break; + } + // SAFETY: span's range is guaranteed to be valid for the haystack. + span = unsafe { Span::from_parts(hay, range.start..pos) }; + } + offset + } +} + +/// A searcher which can be searched from both end with consistent results. +/// +/// Implementing this marker trait enables the following standard algorithms to +/// return [`DoubleEndedIterator`](crate::iter::DoubleEndedIterator)s: +/// +/// * [`matches`](super::ext::matches) / +/// [`rmatches`](super::ext::rmatches) +/// * [`match_indices`](super::ext::match_indices) / +/// [`rmatch_indices`](super::ext::rmatch_indices) +/// * [`match_ranges`](super::ext::match_ranges) / +/// [`rmatch_ranges`](super::ext::rmatch_ranges) +/// * [`split`](super::ext::split) / +/// [`rsplit`](super::ext::rsplit) +/// * [`split_terminator`](super::ext::split_terminator) / +/// [`rsplit_terminator`](super::ext::rsplit_terminator) +/// * [`splitn`](super::ext::splitn) / +/// [`rsplitn`](super::ext::rsplitn) +pub unsafe trait DoubleEndedSearcher: ReverseSearcher {} + +/// A consumer which can be searched from both end with consistent results. +/// +/// It is used to support the following standard algorithm: +/// +/// * [`trim`](super::ext::trim) +/// +/// The `trim` function is implemented by calling +/// [`trim_start`](super::ext::trim_start) and [`trim_end`](super::ext::trim_end) +/// together. This trait encodes the fact that we can call these two functions in any order. +pub unsafe trait DoubleEndedConsumer: ReverseConsumer {} + +/// A needle, a type which can be converted into a searcher. +/// +/// When using search algorithms like [`split()`](super::ext::split), users will +/// search with a `Needle` e.g. a `&str`. A needle is usually stateless, +/// however for efficient searching, we often need some preprocessing and +/// maintain a mutable state. The preprocessed structure is called the +/// [`Searcher`] of this needle. +/// +/// The relationship between `Searcher` and `Needle` is similar to `Iterator` +/// and `IntoIterator`. +pub trait Needle: Sized +where + H::Target: Hay, +{ + /// The searcher associated with this needle. + type Searcher: Searcher; + + /// The consumer associated with this needle. + type Consumer: Consumer; + + /// Produces a searcher for this needle. + fn into_searcher(self) -> Self::Searcher; + + /// Produces a consumer for this needle. + /// + /// Usually a consumer and a searcher can be the same type. + /// Some needles may require different types + /// when the two need different optimization strategies. String searching + /// is an example of this: we use the Two-Way Algorithm when searching for + /// substrings, which needs to preprocess the needle. However this is + /// irrelevant for consuming, which only needs to check for string equality + /// once. Therefore the Consumer for a string would be a distinct type + /// using naive search. + fn into_consumer(self) -> Self::Consumer; +} + +/// Searcher of an empty needle. +/// +/// This searcher will find all empty subslices between any codewords in a +/// haystack. +#[derive(Clone, Debug, Default)] +pub struct EmptySearcher { + consumed_start: bool, + consumed_end: bool, +} + +unsafe impl Searcher for EmptySearcher { + #[inline] + fn search(&mut self, span: Span<&A>) -> Option> { + let (hay, range) = span.into_parts(); + let start = if !self.consumed_start { + self.consumed_start = true; + range.start + } else if range.start == range.end { + return None; + } else { + // SAFETY: span's range is guaranteed to be valid for the haystack. + unsafe { hay.next_index(range.start) } + }; + Some(start..start) + } +} + +unsafe impl Consumer for EmptySearcher { + #[inline] + fn consume(&mut self, span: Span<&A>) -> Option { + let (_, range) = span.into_parts(); + Some(range.start) + } + + #[inline] + fn trim_start(&mut self, hay: &A) -> A::Index { + hay.start_index() + } +} + +unsafe impl ReverseSearcher for EmptySearcher { + #[inline] + fn rsearch(&mut self, span: Span<&A>) -> Option> { + let (hay, range) = span.into_parts(); + let end = if !self.consumed_end { + self.consumed_end = true; + range.end + } else if range.start == range.end { + return None; + } else { + // SAFETY: span's range is guaranteed to be valid for the haystack. + unsafe { hay.prev_index(range.end) } + }; + Some(end..end) + } +} + +unsafe impl ReverseConsumer for EmptySearcher { + #[inline] + fn rconsume(&mut self, span: Span<&A>) -> Option { + let (_, range) = span.into_parts(); + Some(range.end) + } + + #[inline] + fn trim_end(&mut self, hay: &A) -> A::Index { + hay.end_index() + } +} + +unsafe impl DoubleEndedSearcher for EmptySearcher {} +unsafe impl DoubleEndedConsumer for EmptySearcher {} diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 5333d75ec1bc5..a0e31c68435c2 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -277,6 +277,7 @@ #![feature(maybe_uninit_ref)] #![feature(maybe_uninit_slice)] #![feature(min_specialization)] +#![feature(needle)] #![feature(needs_panic_runtime)] #![feature(negative_impls)] #![feature(never_type)] @@ -423,6 +424,8 @@ pub use core::iter; pub use core::marker; #[stable(feature = "rust1", since = "1.0.0")] pub use core::mem; +#[unstable(feature = "needle", issue = "56345")] +pub use core::needle; #[stable(feature = "rust1", since = "1.0.0")] pub use core::ops; #[stable(feature = "rust1", since = "1.0.0")]