core/str/
pattern.rs

1//! The string Pattern API.
2//!
3//! The Pattern API provides a generic mechanism for using different pattern
4//! types when searching through a string.
5//!
6//! For more details, see the traits [`Pattern`], [`Searcher`],
7//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].
8//!
9//! Although this API is unstable, it is exposed via stable APIs on the
10//! [`str`] type.
11//!
12//! # Examples
13//!
14//! [`Pattern`] is [implemented][pattern-impls] in the stable API for
15//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures
16//! implementing `FnMut(char) -> bool`.
17//!
18//! ```
19//! let s = "Can you find a needle in a haystack?";
20//!
21//! // &str pattern
22//! assert_eq!(s.find("you"), Some(4));
23//! // char pattern
24//! assert_eq!(s.find('n'), Some(2));
25//! // array of chars pattern
26//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u']), Some(1));
27//! // slice of chars pattern
28//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1));
29//! // closure pattern
30//! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35));
31//! ```
32//!
33//! [pattern-impls]: Pattern#implementors
34
35#![unstable(
36    feature = "pattern",
37    reason = "API not fully fleshed out and ready to be stabilized",
38    issue = "27721"
39)]
40
41use crate::char::MAX_LEN_UTF8;
42use crate::cmp::Ordering;
43use crate::convert::TryInto as _;
44use crate::slice::memchr;
45use crate::{cmp, fmt};
46
47// Pattern
48
49/// A string pattern.
50///
51/// A `Pattern` expresses that the implementing type
52/// can be used as a string pattern for searching in a [`&str`][str].
53///
54/// For example, both `'a'` and `"aa"` are patterns that
55/// would match at index `1` in the string `"baaaab"`.
56///
57/// The trait itself acts as a builder for an associated
58/// [`Searcher`] type, which does the actual work of finding
59/// occurrences of the pattern in a string.
60///
61/// Depending on the type of the pattern, the behavior of methods like
62/// [`str::find`] and [`str::contains`] can change. The table below describes
63/// some of those behaviors.
64///
65/// | Pattern type             | Match condition                           |
66/// |--------------------------|-------------------------------------------|
67/// | `&str`                   | is substring                              |
68/// | `char`                   | is contained in string                    |
69/// | `&[char]`                | any char in slice is contained in string  |
70/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string   |
71/// | `&&str`                  | is substring                              |
72/// | `&String`                | is substring                              |
73///
74/// # Examples
75///
76/// ```
77/// // &str
78/// assert_eq!("abaaa".find("ba"), Some(1));
79/// assert_eq!("abaaa".find("bac"), None);
80///
81/// // char
82/// assert_eq!("abaaa".find('a'), Some(0));
83/// assert_eq!("abaaa".find('b'), Some(1));
84/// assert_eq!("abaaa".find('c'), None);
85///
86/// // &[char; N]
87/// assert_eq!("ab".find(&['b', 'a']), Some(0));
88/// assert_eq!("abaaa".find(&['a', 'z']), Some(0));
89/// assert_eq!("abaaa".find(&['c', 'd']), None);
90///
91/// // &[char]
92/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0));
93/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0));
94/// assert_eq!("abaaa".find(&['c', 'd'][..]), None);
95///
96/// // FnMut(char) -> bool
97/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4));
98/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None);
99/// ```
100pub trait Pattern: Sized {
101    /// Associated searcher for this pattern
102    type Searcher<'a>: Searcher<'a>;
103
104    /// Constructs the associated searcher from
105    /// `self` and the `haystack` to search in.
106    fn into_searcher(self, haystack: &str) -> Self::Searcher<'_>;
107
108    /// Checks whether the pattern matches anywhere in the haystack
109    #[inline]
110    fn is_contained_in(self, haystack: &str) -> bool {
111        self.into_searcher(haystack).next_match().is_some()
112    }
113
114    /// Checks whether the pattern matches at the front of the haystack
115    #[inline]
116    fn is_prefix_of(self, haystack: &str) -> bool {
117        matches!(self.into_searcher(haystack).next(), SearchStep::Match(0, _))
118    }
119
120    /// Checks whether the pattern matches at the back of the haystack
121    #[inline]
122    fn is_suffix_of<'a>(self, haystack: &'a str) -> bool
123    where
124        Self::Searcher<'a>: ReverseSearcher<'a>,
125    {
126        matches!(self.into_searcher(haystack).next_back(), SearchStep::Match(_, j) if haystack.len() == j)
127    }
128
129    /// Removes the pattern from the front of haystack, if it matches.
130    #[inline]
131    fn strip_prefix_of(self, haystack: &str) -> Option<&str> {
132        if let SearchStep::Match(start, len) = self.into_searcher(haystack).next() {
133            debug_assert_eq!(
134                start, 0,
135                "The first search step from Searcher \
136                 must include the first character"
137            );
138            // SAFETY: `Searcher` is known to return valid indices.
139            unsafe { Some(haystack.get_unchecked(len..)) }
140        } else {
141            None
142        }
143    }
144
145    /// Removes the pattern from the back of haystack, if it matches.
146    #[inline]
147    fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
148    where
149        Self::Searcher<'a>: ReverseSearcher<'a>,
150    {
151        if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() {
152            debug_assert_eq!(
153                end,
154                haystack.len(),
155                "The first search step from ReverseSearcher \
156                 must include the last character"
157            );
158            // SAFETY: `Searcher` is known to return valid indices.
159            unsafe { Some(haystack.get_unchecked(..start)) }
160        } else {
161            None
162        }
163    }
164
165    /// Returns the pattern as utf-8 bytes if possible.
166    fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
167        None
168    }
169}
170/// Result of calling [`Pattern::as_utf8_pattern()`].
171/// Can be used for inspecting the contents of a [`Pattern`] in cases
172/// where the underlying representation can be represented as UTF-8.
173#[derive(Copy, Clone, Eq, PartialEq, Debug)]
174pub enum Utf8Pattern<'a> {
175    /// Type returned by String and str types.
176    StringPattern(&'a [u8]),
177    /// Type returned by char types.
178    CharPattern(char),
179}
180
181// Searcher
182
183/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`].
184#[derive(Copy, Clone, Eq, PartialEq, Debug)]
185pub enum SearchStep {
186    /// Expresses that a match of the pattern has been found at
187    /// `haystack[a..b]`.
188    Match(usize, usize),
189    /// Expresses that `haystack[a..b]` has been rejected as a possible match
190    /// of the pattern.
191    ///
192    /// Note that there might be more than one `Reject` between two `Match`es,
193    /// there is no requirement for them to be combined into one.
194    Reject(usize, usize),
195    /// Expresses that every byte of the haystack has been visited, ending
196    /// the iteration.
197    Done,
198}
199
200/// A searcher for a string pattern.
201///
202/// This trait provides methods for searching for non-overlapping
203/// matches of a pattern starting from the front (left) of a string.
204///
205/// It will be implemented by associated `Searcher`
206/// types of the [`Pattern`] trait.
207///
208/// The trait is marked unsafe because the indices returned by the
209/// [`next()`][Searcher::next] methods are required to lie on valid utf8
210/// boundaries in the haystack. This enables consumers of this trait to
211/// slice the haystack without additional runtime checks.
212pub unsafe trait Searcher<'a> {
213    /// Getter for the underlying string to be searched in
214    ///
215    /// Will always return the same [`&str`][str].
216    fn haystack(&self) -> &'a str;
217
218    /// Performs the next search step starting from the front.
219    ///
220    /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches
221    ///   the pattern.
222    /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can
223    ///   not match the pattern, even partially.
224    /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has
225    ///   been visited.
226    ///
227    /// The stream of [`Match`][SearchStep::Match] and
228    /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done]
229    /// will contain index ranges that are adjacent, non-overlapping,
230    /// covering the whole haystack, and laying on utf8 boundaries.
231    ///
232    /// A [`Match`][SearchStep::Match] result needs to contain the whole matched
233    /// pattern, however [`Reject`][SearchStep::Reject] results may be split up
234    /// into arbitrary many adjacent fragments. Both ranges may have zero length.
235    ///
236    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
237    /// might produce the stream
238    /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
239    fn next(&mut self) -> SearchStep;
240
241    /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next].
242    ///
243    /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges
244    /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return
245    /// `(start_match, end_match)`, where start_match is the index of where
246    /// the match begins, and end_match is the index after the end of the match.
247    #[inline]
248    fn next_match(&mut self) -> Option<(usize, usize)> {
249        loop {
250            match self.next() {
251                SearchStep::Match(a, b) => return Some((a, b)),
252                SearchStep::Done => return None,
253                _ => continue,
254            }
255        }
256    }
257
258    /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next]
259    /// and [`next_match()`][Searcher::next_match].
260    ///
261    /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges
262    /// of this and [`next_match`][Searcher::next_match] will overlap.
263    #[inline]
264    fn next_reject(&mut self) -> Option<(usize, usize)> {
265        loop {
266            match self.next() {
267                SearchStep::Reject(a, b) => return Some((a, b)),
268                SearchStep::Done => return None,
269                _ => continue,
270            }
271        }
272    }
273}
274
275/// A reverse searcher for a string pattern.
276///
277/// This trait provides methods for searching for non-overlapping
278/// matches of a pattern starting from the back (right) of a string.
279///
280/// It will be implemented by associated [`Searcher`]
281/// types of the [`Pattern`] trait if the pattern supports searching
282/// for it from the back.
283///
284/// The index ranges returned by this trait are not required
285/// to exactly match those of the forward search in reverse.
286///
287/// For the reason why this trait is marked unsafe, see the
288/// parent trait [`Searcher`].
289pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
290    /// Performs the next search step starting from the back.
291    ///
292    /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]`
293    ///   matches the pattern.
294    /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]`
295    ///   can not match the pattern, even partially.
296    /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack
297    ///   has been visited
298    ///
299    /// The stream of [`Match`][SearchStep::Match] and
300    /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done]
301    /// will contain index ranges that are adjacent, non-overlapping,
302    /// covering the whole haystack, and laying on utf8 boundaries.
303    ///
304    /// A [`Match`][SearchStep::Match] result needs to contain the whole matched
305    /// pattern, however [`Reject`][SearchStep::Reject] results may be split up
306    /// into arbitrary many adjacent fragments. Both ranges may have zero length.
307    ///
308    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
309    /// might produce the stream
310    /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`.
311    fn next_back(&mut self) -> SearchStep;
312
313    /// Finds the next [`Match`][SearchStep::Match] result.
314    /// See [`next_back()`][ReverseSearcher::next_back].
315    #[inline]
316    fn next_match_back(&mut self) -> Option<(usize, usize)> {
317        loop {
318            match self.next_back() {
319                SearchStep::Match(a, b) => return Some((a, b)),
320                SearchStep::Done => return None,
321                _ => continue,
322            }
323        }
324    }
325
326    /// Finds the next [`Reject`][SearchStep::Reject] result.
327    /// See [`next_back()`][ReverseSearcher::next_back].
328    #[inline]
329    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
330        loop {
331            match self.next_back() {
332                SearchStep::Reject(a, b) => return Some((a, b)),
333                SearchStep::Done => return None,
334                _ => continue,
335            }
336        }
337    }
338}
339
340/// A marker trait to express that a [`ReverseSearcher`]
341/// can be used for a [`DoubleEndedIterator`] implementation.
342///
343/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need
344/// to follow these conditions:
345///
346/// - All results of `next()` need to be identical
347///   to the results of `next_back()` in reverse order.
348/// - `next()` and `next_back()` need to behave as
349///   the two ends of a range of values, that is they
350///   can not "walk past each other".
351///
352/// # Examples
353///
354/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
355/// [`char`] only requires looking at one at a time, which behaves the same
356/// from both ends.
357///
358/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
359/// the pattern `"aa"` in the haystack `"aaa"` matches as either
360/// `"[aa]a"` or `"a[aa]"`, depending on which side it is searched.
361pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
362
363/////////////////////////////////////////////////////////////////////////////
364// Impl for char
365/////////////////////////////////////////////////////////////////////////////
366
367/// Associated type for `<char as Pattern>::Searcher<'a>`.
368#[derive(Clone, Debug)]
369pub struct CharSearcher<'a> {
370    haystack: &'a str,
371    // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
372    // This invariant can be broken *within* next_match and next_match_back, however
373    // they must exit with fingers on valid code point boundaries.
374    /// `finger` is the current byte index of the forward search.
375    /// Imagine that it exists before the byte at its index, i.e.
376    /// `haystack[finger]` is the first byte of the slice we must inspect during
377    /// forward searching
378    finger: usize,
379    /// `finger_back` is the current byte index of the reverse search.
380    /// Imagine that it exists after the byte at its index, i.e.
381    /// haystack[finger_back - 1] is the last byte of the slice we must inspect during
382    /// forward searching (and thus the first byte to be inspected when calling next_back()).
383    finger_back: usize,
384    /// The character being searched for
385    needle: char,
386
387    // safety invariant: `utf8_size` must be less than 5
388    /// The number of bytes `needle` takes up when encoded in utf8.
389    utf8_size: u8,
390    /// A utf8 encoded copy of the `needle`
391    utf8_encoded: [u8; 4],
392}
393
394impl CharSearcher<'_> {
395    fn utf8_size(&self) -> usize {
396        self.utf8_size.into()
397    }
398}
399
400unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
401    #[inline]
402    fn haystack(&self) -> &'a str {
403        self.haystack
404    }
405    #[inline]
406    fn next(&mut self) -> SearchStep {
407        let old_finger = self.finger;
408        // SAFETY: 1-4 guarantee safety of `get_unchecked`
409        // 1. `self.finger` and `self.finger_back` are kept on unicode boundaries
410        //    (this is invariant)
411        // 2. `self.finger >= 0` since it starts at 0 and only increases
412        // 3. `self.finger < self.finger_back` because otherwise the char `iter`
413        //    would return `SearchStep::Done`
414        // 4. `self.finger` comes before the end of the haystack because `self.finger_back`
415        //    starts at the end and only decreases
416        let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
417        let mut iter = slice.chars();
418        let old_len = iter.iter.len();
419        if let Some(ch) = iter.next() {
420            // add byte offset of current character
421            // without re-encoding as utf-8
422            self.finger += old_len - iter.iter.len();
423            if ch == self.needle {
424                SearchStep::Match(old_finger, self.finger)
425            } else {
426                SearchStep::Reject(old_finger, self.finger)
427            }
428        } else {
429            SearchStep::Done
430        }
431    }
432    #[inline]
433    fn next_match(&mut self) -> Option<(usize, usize)> {
434        loop {
435            // get the haystack after the last character found
436            let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
437            // the last byte of the utf8 encoded needle
438            // SAFETY: we have an invariant that `utf8_size < 5`
439            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
440            if let Some(index) = memchr::memchr(last_byte, bytes) {
441                // The new finger is the index of the byte we found,
442                // plus one, since we memchr'd for the last byte of the character.
443                //
444                // Note that this doesn't always give us a finger on a UTF8 boundary.
445                // If we *didn't* find our character
446                // we may have indexed to the non-last byte of a 3-byte or 4-byte character.
447                // We can't just skip to the next valid starting byte because a character like
448                // ꁁ (U+A041 YI SYLLABLE PA), utf-8 `EA 81 81` will have us always find
449                // the second byte when searching for the third.
450                //
451                // However, this is totally okay. While we have the invariant that
452                // self.finger is on a UTF8 boundary, this invariant is not relied upon
453                // within this method (it is relied upon in CharSearcher::next()).
454                //
455                // We only exit this method when we reach the end of the string, or if we
456                // find something. When we find something the `finger` will be set
457                // to a UTF8 boundary.
458                self.finger += index + 1;
459                if self.finger >= self.utf8_size() {
460                    let found_char = self.finger - self.utf8_size();
461                    if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
462                        if slice == &self.utf8_encoded[0..self.utf8_size()] {
463                            return Some((found_char, self.finger));
464                        }
465                    }
466                }
467            } else {
468                // found nothing, exit
469                self.finger = self.finger_back;
470                return None;
471            }
472        }
473    }
474
475    // let next_reject use the default implementation from the Searcher trait
476}
477
478unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
479    #[inline]
480    fn next_back(&mut self) -> SearchStep {
481        let old_finger = self.finger_back;
482        // SAFETY: see the comment for next() above
483        let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
484        let mut iter = slice.chars();
485        let old_len = iter.iter.len();
486        if let Some(ch) = iter.next_back() {
487            // subtract byte offset of current character
488            // without re-encoding as utf-8
489            self.finger_back -= old_len - iter.iter.len();
490            if ch == self.needle {
491                SearchStep::Match(self.finger_back, old_finger)
492            } else {
493                SearchStep::Reject(self.finger_back, old_finger)
494            }
495        } else {
496            SearchStep::Done
497        }
498    }
499    #[inline]
500    fn next_match_back(&mut self) -> Option<(usize, usize)> {
501        let haystack = self.haystack.as_bytes();
502        loop {
503            // get the haystack up to but not including the last character searched
504            let bytes = haystack.get(self.finger..self.finger_back)?;
505            // the last byte of the utf8 encoded needle
506            // SAFETY: we have an invariant that `utf8_size < 5`
507            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
508            if let Some(index) = memchr::memrchr(last_byte, bytes) {
509                // we searched a slice that was offset by self.finger,
510                // add self.finger to recoup the original index
511                let index = self.finger + index;
512                // memrchr will return the index of the byte we wish to
513                // find. In case of an ASCII character, this is indeed
514                // were we wish our new finger to be ("after" the found
515                // char in the paradigm of reverse iteration). For
516                // multibyte chars we need to skip down by the number of more
517                // bytes they have than ASCII
518                let shift = self.utf8_size() - 1;
519                if index >= shift {
520                    let found_char = index - shift;
521                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) {
522                        if slice == &self.utf8_encoded[0..self.utf8_size()] {
523                            // move finger to before the character found (i.e., at its start index)
524                            self.finger_back = found_char;
525                            return Some((self.finger_back, self.finger_back + self.utf8_size()));
526                        }
527                    }
528                }
529                // We can't use finger_back = index - size + 1 here. If we found the last char
530                // of a different-sized character (or the middle byte of a different character)
531                // we need to bump the finger_back down to `index`. This similarly makes
532                // `finger_back` have the potential to no longer be on a boundary,
533                // but this is OK since we only exit this function on a boundary
534                // or when the haystack has been searched completely.
535                //
536                // Unlike next_match this does not
537                // have the problem of repeated bytes in utf-8 because
538                // we're searching for the last byte, and we can only have
539                // found the last byte when searching in reverse.
540                self.finger_back = index;
541            } else {
542                self.finger_back = self.finger;
543                // found nothing, exit
544                return None;
545            }
546        }
547    }
548
549    // let next_reject_back use the default implementation from the Searcher trait
550}
551
552impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
553
554/// Searches for chars that are equal to a given [`char`].
555///
556/// # Examples
557///
558/// ```
559/// assert_eq!("Hello world".find('o'), Some(4));
560/// ```
561impl Pattern for char {
562    type Searcher<'a> = CharSearcher<'a>;
563
564    #[inline]
565    fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> {
566        let mut utf8_encoded = [0; MAX_LEN_UTF8];
567        let utf8_size = self
568            .encode_utf8(&mut utf8_encoded)
569            .len()
570            .try_into()
571            .expect("char len should be less than 255");
572
573        CharSearcher {
574            haystack,
575            finger: 0,
576            finger_back: haystack.len(),
577            needle: self,
578            utf8_size,
579            utf8_encoded,
580        }
581    }
582
583    #[inline]
584    fn is_contained_in(self, haystack: &str) -> bool {
585        if (self as u32) < 128 {
586            haystack.as_bytes().contains(&(self as u8))
587        } else {
588            let mut buffer = [0u8; 4];
589            self.encode_utf8(&mut buffer).is_contained_in(haystack)
590        }
591    }
592
593    #[inline]
594    fn is_prefix_of(self, haystack: &str) -> bool {
595        self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack)
596    }
597
598    #[inline]
599    fn strip_prefix_of(self, haystack: &str) -> Option<&str> {
600        self.encode_utf8(&mut [0u8; 4]).strip_prefix_of(haystack)
601    }
602
603    #[inline]
604    fn is_suffix_of<'a>(self, haystack: &'a str) -> bool
605    where
606        Self::Searcher<'a>: ReverseSearcher<'a>,
607    {
608        self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack)
609    }
610
611    #[inline]
612    fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
613    where
614        Self::Searcher<'a>: ReverseSearcher<'a>,
615    {
616        self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
617    }
618
619    #[inline]
620    fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
621        Some(Utf8Pattern::CharPattern(*self))
622    }
623}
624
625/////////////////////////////////////////////////////////////////////////////
626// Impl for a MultiCharEq wrapper
627/////////////////////////////////////////////////////////////////////////////
628
629#[doc(hidden)]
630trait MultiCharEq {
631    fn matches(&mut self, c: char) -> bool;
632}
633
634impl<F> MultiCharEq for F
635where
636    F: FnMut(char) -> bool,
637{
638    #[inline]
639    fn matches(&mut self, c: char) -> bool {
640        (*self)(c)
641    }
642}
643
644impl<const N: usize> MultiCharEq for [char; N] {
645    #[inline]
646    fn matches(&mut self, c: char) -> bool {
647        self.contains(&c)
648    }
649}
650
651impl<const N: usize> MultiCharEq for &[char; N] {
652    #[inline]
653    fn matches(&mut self, c: char) -> bool {
654        self.contains(&c)
655    }
656}
657
658impl MultiCharEq for &[char] {
659    #[inline]
660    fn matches(&mut self, c: char) -> bool {
661        self.contains(&c)
662    }
663}
664
665struct MultiCharEqPattern<C: MultiCharEq>(C);
666
667#[derive(Clone, Debug)]
668struct MultiCharEqSearcher<'a, C: MultiCharEq> {
669    char_eq: C,
670    haystack: &'a str,
671    char_indices: super::CharIndices<'a>,
672}
673
674impl<C: MultiCharEq> Pattern for MultiCharEqPattern<C> {
675    type Searcher<'a> = MultiCharEqSearcher<'a, C>;
676
677    #[inline]
678    fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> {
679        MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
680    }
681}
682
683unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
684    #[inline]
685    fn haystack(&self) -> &'a str {
686        self.haystack
687    }
688
689    #[inline]
690    fn next(&mut self) -> SearchStep {
691        let s = &mut self.char_indices;
692        // Compare lengths of the internal byte slice iterator
693        // to find length of current char
694        let pre_len = s.iter.iter.len();
695        if let Some((i, c)) = s.next() {
696            let len = s.iter.iter.len();
697            let char_len = pre_len - len;
698            if self.char_eq.matches(c) {
699                return SearchStep::Match(i, i + char_len);
700            } else {
701                return SearchStep::Reject(i, i + char_len);
702            }
703        }
704        SearchStep::Done
705    }
706}
707
708unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> {
709    #[inline]
710    fn next_back(&mut self) -> SearchStep {
711        let s = &mut self.char_indices;
712        // Compare lengths of the internal byte slice iterator
713        // to find length of current char
714        let pre_len = s.iter.iter.len();
715        if let Some((i, c)) = s.next_back() {
716            let len = s.iter.iter.len();
717            let char_len = pre_len - len;
718            if self.char_eq.matches(c) {
719                return SearchStep::Match(i, i + char_len);
720            } else {
721                return SearchStep::Reject(i, i + char_len);
722            }
723        }
724        SearchStep::Done
725    }
726}
727
728impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {}
729
730/////////////////////////////////////////////////////////////////////////////
731
732macro_rules! pattern_methods {
733    ($a:lifetime, $t:ty, $pmap:expr, $smap:expr) => {
734        type Searcher<$a> = $t;
735
736        #[inline]
737        fn into_searcher<$a>(self, haystack: &$a str) -> $t {
738            ($smap)(($pmap)(self).into_searcher(haystack))
739        }
740
741        #[inline]
742        fn is_contained_in<$a>(self, haystack: &$a str) -> bool {
743            ($pmap)(self).is_contained_in(haystack)
744        }
745
746        #[inline]
747        fn is_prefix_of<$a>(self, haystack: &$a str) -> bool {
748            ($pmap)(self).is_prefix_of(haystack)
749        }
750
751        #[inline]
752        fn strip_prefix_of<$a>(self, haystack: &$a str) -> Option<&$a str> {
753            ($pmap)(self).strip_prefix_of(haystack)
754        }
755
756        #[inline]
757        fn is_suffix_of<$a>(self, haystack: &$a str) -> bool
758        where
759            $t: ReverseSearcher<$a>,
760        {
761            ($pmap)(self).is_suffix_of(haystack)
762        }
763
764        #[inline]
765        fn strip_suffix_of<$a>(self, haystack: &$a str) -> Option<&$a str>
766        where
767            $t: ReverseSearcher<$a>,
768        {
769            ($pmap)(self).strip_suffix_of(haystack)
770        }
771    };
772}
773
774macro_rules! searcher_methods {
775    (forward) => {
776        #[inline]
777        fn haystack(&self) -> &'a str {
778            self.0.haystack()
779        }
780        #[inline]
781        fn next(&mut self) -> SearchStep {
782            self.0.next()
783        }
784        #[inline]
785        fn next_match(&mut self) -> Option<(usize, usize)> {
786            self.0.next_match()
787        }
788        #[inline]
789        fn next_reject(&mut self) -> Option<(usize, usize)> {
790            self.0.next_reject()
791        }
792    };
793    (reverse) => {
794        #[inline]
795        fn next_back(&mut self) -> SearchStep {
796            self.0.next_back()
797        }
798        #[inline]
799        fn next_match_back(&mut self) -> Option<(usize, usize)> {
800            self.0.next_match_back()
801        }
802        #[inline]
803        fn next_reject_back(&mut self) -> Option<(usize, usize)> {
804            self.0.next_reject_back()
805        }
806    };
807}
808
809/// Associated type for `<[char; N] as Pattern>::Searcher<'a>`.
810#[derive(Clone, Debug)]
811pub struct CharArraySearcher<'a, const N: usize>(
812    <MultiCharEqPattern<[char; N]> as Pattern>::Searcher<'a>,
813);
814
815/// Associated type for `<&[char; N] as Pattern>::Searcher<'a>`.
816#[derive(Clone, Debug)]
817pub struct CharArrayRefSearcher<'a, 'b, const N: usize>(
818    <MultiCharEqPattern<&'b [char; N]> as Pattern>::Searcher<'a>,
819);
820
821/// Searches for chars that are equal to any of the [`char`]s in the array.
822///
823/// # Examples
824///
825/// ```
826/// assert_eq!("Hello world".find(['o', 'l']), Some(2));
827/// assert_eq!("Hello world".find(['h', 'w']), Some(6));
828/// ```
829impl<const N: usize> Pattern for [char; N] {
830    pattern_methods!('a, CharArraySearcher<'a, N>, MultiCharEqPattern, CharArraySearcher);
831}
832
833unsafe impl<'a, const N: usize> Searcher<'a> for CharArraySearcher<'a, N> {
834    searcher_methods!(forward);
835}
836
837unsafe impl<'a, const N: usize> ReverseSearcher<'a> for CharArraySearcher<'a, N> {
838    searcher_methods!(reverse);
839}
840
841impl<'a, const N: usize> DoubleEndedSearcher<'a> for CharArraySearcher<'a, N> {}
842
843/// Searches for chars that are equal to any of the [`char`]s in the array.
844///
845/// # Examples
846///
847/// ```
848/// assert_eq!("Hello world".find(&['o', 'l']), Some(2));
849/// assert_eq!("Hello world".find(&['h', 'w']), Some(6));
850/// ```
851impl<'b, const N: usize> Pattern for &'b [char; N] {
852    pattern_methods!('a, CharArrayRefSearcher<'a, 'b, N>, MultiCharEqPattern, CharArrayRefSearcher);
853}
854
855unsafe impl<'a, 'b, const N: usize> Searcher<'a> for CharArrayRefSearcher<'a, 'b, N> {
856    searcher_methods!(forward);
857}
858
859unsafe impl<'a, 'b, const N: usize> ReverseSearcher<'a> for CharArrayRefSearcher<'a, 'b, N> {
860    searcher_methods!(reverse);
861}
862
863impl<'a, 'b, const N: usize> DoubleEndedSearcher<'a> for CharArrayRefSearcher<'a, 'b, N> {}
864
865/////////////////////////////////////////////////////////////////////////////
866// Impl for &[char]
867/////////////////////////////////////////////////////////////////////////////
868
869// Todo: Change / Remove due to ambiguity in meaning.
870
871/// Associated type for `<&[char] as Pattern>::Searcher<'a>`.
872#[derive(Clone, Debug)]
873pub struct CharSliceSearcher<'a, 'b>(<MultiCharEqPattern<&'b [char]> as Pattern>::Searcher<'a>);
874
875unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
876    searcher_methods!(forward);
877}
878
879unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
880    searcher_methods!(reverse);
881}
882
883impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
884
885/// Searches for chars that are equal to any of the [`char`]s in the slice.
886///
887/// # Examples
888///
889/// ```
890/// assert_eq!("Hello world".find(&['o', 'l'][..]), Some(2));
891/// assert_eq!("Hello world".find(&['h', 'w'][..]), Some(6));
892/// ```
893impl<'b> Pattern for &'b [char] {
894    pattern_methods!('a, CharSliceSearcher<'a, 'b>, MultiCharEqPattern, CharSliceSearcher);
895}
896
897/////////////////////////////////////////////////////////////////////////////
898// Impl for F: FnMut(char) -> bool
899/////////////////////////////////////////////////////////////////////////////
900
901/// Associated type for `<F as Pattern>::Searcher<'a>`.
902#[derive(Clone)]
903pub struct CharPredicateSearcher<'a, F>(<MultiCharEqPattern<F> as Pattern>::Searcher<'a>)
904where
905    F: FnMut(char) -> bool;
906
907impl<F> fmt::Debug for CharPredicateSearcher<'_, F>
908where
909    F: FnMut(char) -> bool,
910{
911    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
912        f.debug_struct("CharPredicateSearcher")
913            .field("haystack", &self.0.haystack)
914            .field("char_indices", &self.0.char_indices)
915            .finish()
916    }
917}
918unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
919where
920    F: FnMut(char) -> bool,
921{
922    searcher_methods!(forward);
923}
924
925unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
926where
927    F: FnMut(char) -> bool,
928{
929    searcher_methods!(reverse);
930}
931
932impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {}
933
934/// Searches for [`char`]s that match the given predicate.
935///
936/// # Examples
937///
938/// ```
939/// assert_eq!("Hello world".find(char::is_uppercase), Some(0));
940/// assert_eq!("Hello world".find(|c| "aeiou".contains(c)), Some(1));
941/// ```
942impl<F> Pattern for F
943where
944    F: FnMut(char) -> bool,
945{
946    pattern_methods!('a, CharPredicateSearcher<'a, F>, MultiCharEqPattern, CharPredicateSearcher);
947}
948
949/////////////////////////////////////////////////////////////////////////////
950// Impl for &&str
951/////////////////////////////////////////////////////////////////////////////
952
953/// Delegates to the `&str` impl.
954impl<'b, 'c> Pattern for &'c &'b str {
955    pattern_methods!('a, StrSearcher<'a, 'b>, |&s| s, |s| s);
956}
957
958/////////////////////////////////////////////////////////////////////////////
959// Impl for &str
960/////////////////////////////////////////////////////////////////////////////
961
962/// Non-allocating substring search.
963///
964/// Will handle the pattern `""` as returning empty matches at each character
965/// boundary.
966///
967/// # Examples
968///
969/// ```
970/// assert_eq!("Hello world".find("world"), Some(6));
971/// ```
972impl<'b> Pattern for &'b str {
973    type Searcher<'a> = StrSearcher<'a, 'b>;
974
975    #[inline]
976    fn into_searcher(self, haystack: &str) -> StrSearcher<'_, 'b> {
977        StrSearcher::new(haystack, self)
978    }
979
980    /// Checks whether the pattern matches at the front of the haystack.
981    #[inline]
982    fn is_prefix_of(self, haystack: &str) -> bool {
983        haystack.as_bytes().starts_with(self.as_bytes())
984    }
985
986    /// Checks whether the pattern matches anywhere in the haystack
987    #[inline]
988    fn is_contained_in(self, haystack: &str) -> bool {
989        if self.len() == 0 {
990            return true;
991        }
992
993        match self.len().cmp(&haystack.len()) {
994            Ordering::Less => {
995                if self.len() == 1 {
996                    return haystack.as_bytes().contains(&self.as_bytes()[0]);
997                }
998
999                #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
1000                if self.len() <= 32 {
1001                    if let Some(result) = simd_contains(self, haystack) {
1002                        return result;
1003                    }
1004                }
1005
1006                self.into_searcher(haystack).next_match().is_some()
1007            }
1008            _ => self == haystack,
1009        }
1010    }
1011
1012    /// Removes the pattern from the front of haystack, if it matches.
1013    #[inline]
1014    fn strip_prefix_of(self, haystack: &str) -> Option<&str> {
1015        if self.is_prefix_of(haystack) {
1016            // SAFETY: prefix was just verified to exist.
1017            unsafe { Some(haystack.get_unchecked(self.as_bytes().len()..)) }
1018        } else {
1019            None
1020        }
1021    }
1022
1023    /// Checks whether the pattern matches at the back of the haystack.
1024    #[inline]
1025    fn is_suffix_of<'a>(self, haystack: &'a str) -> bool
1026    where
1027        Self::Searcher<'a>: ReverseSearcher<'a>,
1028    {
1029        haystack.as_bytes().ends_with(self.as_bytes())
1030    }
1031
1032    /// Removes the pattern from the back of haystack, if it matches.
1033    #[inline]
1034    fn strip_suffix_of<'a>(self, haystack: &'a str) -> Option<&'a str>
1035    where
1036        Self::Searcher<'a>: ReverseSearcher<'a>,
1037    {
1038        if self.is_suffix_of(haystack) {
1039            let i = haystack.len() - self.as_bytes().len();
1040            // SAFETY: suffix was just verified to exist.
1041            unsafe { Some(haystack.get_unchecked(..i)) }
1042        } else {
1043            None
1044        }
1045    }
1046
1047    #[inline]
1048    fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
1049        Some(Utf8Pattern::StringPattern(self.as_bytes()))
1050    }
1051}
1052
1053/////////////////////////////////////////////////////////////////////////////
1054// Two Way substring searcher
1055/////////////////////////////////////////////////////////////////////////////
1056
1057#[derive(Clone, Debug)]
1058/// Associated type for `<&str as Pattern>::Searcher<'a>`.
1059pub struct StrSearcher<'a, 'b> {
1060    haystack: &'a str,
1061    needle: &'b str,
1062
1063    searcher: StrSearcherImpl,
1064}
1065
1066#[derive(Clone, Debug)]
1067enum StrSearcherImpl {
1068    Empty(EmptyNeedle),
1069    TwoWay(TwoWaySearcher),
1070}
1071
1072#[derive(Clone, Debug)]
1073struct EmptyNeedle {
1074    position: usize,
1075    end: usize,
1076    is_match_fw: bool,
1077    is_match_bw: bool,
1078    // Needed in case of an empty haystack, see #85462
1079    is_finished: bool,
1080}
1081
1082impl<'a, 'b> StrSearcher<'a, 'b> {
1083    fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
1084        if needle.is_empty() {
1085            StrSearcher {
1086                haystack,
1087                needle,
1088                searcher: StrSearcherImpl::Empty(EmptyNeedle {
1089                    position: 0,
1090                    end: haystack.len(),
1091                    is_match_fw: true,
1092                    is_match_bw: true,
1093                    is_finished: false,
1094                }),
1095            }
1096        } else {
1097            StrSearcher {
1098                haystack,
1099                needle,
1100                searcher: StrSearcherImpl::TwoWay(TwoWaySearcher::new(
1101                    needle.as_bytes(),
1102                    haystack.len(),
1103                )),
1104            }
1105        }
1106    }
1107}
1108
1109unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
1110    #[inline]
1111    fn haystack(&self) -> &'a str {
1112        self.haystack
1113    }
1114
1115    #[inline]
1116    fn next(&mut self) -> SearchStep {
1117        match self.searcher {
1118            StrSearcherImpl::Empty(ref mut searcher) => {
1119                if searcher.is_finished {
1120                    return SearchStep::Done;
1121                }
1122                // empty needle rejects every char and matches every empty string between them
1123                let is_match = searcher.is_match_fw;
1124                searcher.is_match_fw = !searcher.is_match_fw;
1125                let pos = searcher.position;
1126                match self.haystack[pos..].chars().next() {
1127                    _ if is_match => SearchStep::Match(pos, pos),
1128                    None => {
1129                        searcher.is_finished = true;
1130                        SearchStep::Done
1131                    }
1132                    Some(ch) => {
1133                        searcher.position += ch.len_utf8();
1134                        SearchStep::Reject(pos, searcher.position)
1135                    }
1136                }
1137            }
1138            StrSearcherImpl::TwoWay(ref mut searcher) => {
1139                // TwoWaySearcher produces valid *Match* indices that split at char boundaries
1140                // as long as it does correct matching and that haystack and needle are
1141                // valid UTF-8
1142                // *Rejects* from the algorithm can fall on any indices, but we will walk them
1143                // manually to the next character boundary, so that they are utf-8 safe.
1144                if searcher.position == self.haystack.len() {
1145                    return SearchStep::Done;
1146                }
1147                let is_long = searcher.memory == usize::MAX;
1148                match searcher.next::<RejectAndMatch>(
1149                    self.haystack.as_bytes(),
1150                    self.needle.as_bytes(),
1151                    is_long,
1152                ) {
1153                    SearchStep::Reject(a, mut b) => {
1154                        // skip to next char boundary
1155                        while !self.haystack.is_char_boundary(b) {
1156                            b += 1;
1157                        }
1158                        searcher.position = cmp::max(b, searcher.position);
1159                        SearchStep::Reject(a, b)
1160                    }
1161                    otherwise => otherwise,
1162                }
1163            }
1164        }
1165    }
1166
1167    #[inline]
1168    fn next_match(&mut self) -> Option<(usize, usize)> {
1169        match self.searcher {
1170            StrSearcherImpl::Empty(..) => loop {
1171                match self.next() {
1172                    SearchStep::Match(a, b) => return Some((a, b)),
1173                    SearchStep::Done => return None,
1174                    SearchStep::Reject(..) => {}
1175                }
1176            },
1177            StrSearcherImpl::TwoWay(ref mut searcher) => {
1178                let is_long = searcher.memory == usize::MAX;
1179                // write out `true` and `false` cases to encourage the compiler
1180                // to specialize the two cases separately.
1181                if is_long {
1182                    searcher.next::<MatchOnly>(
1183                        self.haystack.as_bytes(),
1184                        self.needle.as_bytes(),
1185                        true,
1186                    )
1187                } else {
1188                    searcher.next::<MatchOnly>(
1189                        self.haystack.as_bytes(),
1190                        self.needle.as_bytes(),
1191                        false,
1192                    )
1193                }
1194            }
1195        }
1196    }
1197}
1198
1199unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
1200    #[inline]
1201    fn next_back(&mut self) -> SearchStep {
1202        match self.searcher {
1203            StrSearcherImpl::Empty(ref mut searcher) => {
1204                if searcher.is_finished {
1205                    return SearchStep::Done;
1206                }
1207                let is_match = searcher.is_match_bw;
1208                searcher.is_match_bw = !searcher.is_match_bw;
1209                let end = searcher.end;
1210                match self.haystack[..end].chars().next_back() {
1211                    _ if is_match => SearchStep::Match(end, end),
1212                    None => {
1213                        searcher.is_finished = true;
1214                        SearchStep::Done
1215                    }
1216                    Some(ch) => {
1217                        searcher.end -= ch.len_utf8();
1218                        SearchStep::Reject(searcher.end, end)
1219                    }
1220                }
1221            }
1222            StrSearcherImpl::TwoWay(ref mut searcher) => {
1223                if searcher.end == 0 {
1224                    return SearchStep::Done;
1225                }
1226                let is_long = searcher.memory == usize::MAX;
1227                match searcher.next_back::<RejectAndMatch>(
1228                    self.haystack.as_bytes(),
1229                    self.needle.as_bytes(),
1230                    is_long,
1231                ) {
1232                    SearchStep::Reject(mut a, b) => {
1233                        // skip to next char boundary
1234                        while !self.haystack.is_char_boundary(a) {
1235                            a -= 1;
1236                        }
1237                        searcher.end = cmp::min(a, searcher.end);
1238                        SearchStep::Reject(a, b)
1239                    }
1240                    otherwise => otherwise,
1241                }
1242            }
1243        }
1244    }
1245
1246    #[inline]
1247    fn next_match_back(&mut self) -> Option<(usize, usize)> {
1248        match self.searcher {
1249            StrSearcherImpl::Empty(..) => loop {
1250                match self.next_back() {
1251                    SearchStep::Match(a, b) => return Some((a, b)),
1252                    SearchStep::Done => return None,
1253                    SearchStep::Reject(..) => {}
1254                }
1255            },
1256            StrSearcherImpl::TwoWay(ref mut searcher) => {
1257                let is_long = searcher.memory == usize::MAX;
1258                // write out `true` and `false`, like `next_match`
1259                if is_long {
1260                    searcher.next_back::<MatchOnly>(
1261                        self.haystack.as_bytes(),
1262                        self.needle.as_bytes(),
1263                        true,
1264                    )
1265                } else {
1266                    searcher.next_back::<MatchOnly>(
1267                        self.haystack.as_bytes(),
1268                        self.needle.as_bytes(),
1269                        false,
1270                    )
1271                }
1272            }
1273        }
1274    }
1275}
1276
1277/// The internal state of the two-way substring search algorithm.
1278#[derive(Clone, Debug)]
1279struct TwoWaySearcher {
1280    // constants
1281    /// critical factorization index
1282    crit_pos: usize,
1283    /// critical factorization index for reversed needle
1284    crit_pos_back: usize,
1285    period: usize,
1286    /// `byteset` is an extension (not part of the two way algorithm);
1287    /// it's a 64-bit "fingerprint" where each set bit `j` corresponds
1288    /// to a (byte & 63) == j present in the needle.
1289    byteset: u64,
1290
1291    // variables
1292    position: usize,
1293    end: usize,
1294    /// index into needle before which we have already matched
1295    memory: usize,
1296    /// index into needle after which we have already matched
1297    memory_back: usize,
1298}
1299
1300/*
1301    This is the Two-Way search algorithm, which was introduced in the paper:
1302    Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
1303
1304    Here's some background information.
1305
1306    A *word* is a string of symbols. The *length* of a word should be a familiar
1307    notion, and here we denote it for any word x by |x|.
1308    (We also allow for the possibility of the *empty word*, a word of length zero).
1309
1310    If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be a
1311    *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == x[i+p].
1312    For example, both 1 and 2 are periods for the string "aa". As another example,
1313    the only period of the string "abcd" is 4.
1314
1315    We denote by period(x) the *smallest* period of x (provided that x is non-empty).
1316    This is always well-defined since every non-empty word x has at least one period,
1317    |x|. We sometimes call this *the period* of x.
1318
1319    If u, v and x are words such that x = uv, where uv is the concatenation of u and
1320    v, then we say that (u, v) is a *factorization* of x.
1321
1322    Let (u, v) be a factorization for a word x. Then if w is a non-empty word such
1323    that both of the following hold
1324
1325      - either w is a suffix of u or u is a suffix of w
1326      - either w is a prefix of v or v is a prefix of w
1327
1328    then w is said to be a *repetition* for the factorization (u, v).
1329
1330    Just to unpack this, there are four possibilities here. Let w = "abc". Then we
1331    might have:
1332
1333      - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
1334      - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
1335      - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
1336      - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
1337
1338    Note that the word vu is a repetition for any factorization (u,v) of x = uv,
1339    so every factorization has at least one repetition.
1340
1341    If x is a string and (u, v) is a factorization for x, then a *local period* for
1342    (u, v) is an integer r such that there is some word w such that |w| = r and w is
1343    a repetition for (u, v).
1344
1345    We denote by local_period(u, v) the smallest local period of (u, v). We sometimes
1346    call this *the local period* of (u, v). Provided that x = uv is non-empty, this
1347    is well-defined (because each non-empty word has at least one factorization, as
1348    noted above).
1349
1350    It can be proven that the following is an equivalent definition of a local period
1351    for a factorization (u, v): any positive integer r such that x[i] == x[i+r] for
1352    all i such that |u| - r <= i <= |u| - 1 and such that both x[i] and x[i+r] are
1353    defined. (i.e., i > 0 and i + r < |x|).
1354
1355    Using the above reformulation, it is easy to prove that
1356
1357        1 <= local_period(u, v) <= period(uv)
1358
1359    A factorization (u, v) of x such that local_period(u,v) = period(x) is called a
1360    *critical factorization*.
1361
1362    The algorithm hinges on the following theorem, which is stated without proof:
1363
1364    **Critical Factorization Theorem** Any word x has at least one critical
1365    factorization (u, v) such that |u| < period(x).
1366
1367    The purpose of maximal_suffix is to find such a critical factorization.
1368
1369    If the period is short, compute another factorization x = u' v' to use
1370    for reverse search, chosen instead so that |v'| < period(x).
1371
1372*/
1373impl TwoWaySearcher {
1374    fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
1375        let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
1376        let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
1377
1378        let (crit_pos, period) = if crit_pos_false > crit_pos_true {
1379            (crit_pos_false, period_false)
1380        } else {
1381            (crit_pos_true, period_true)
1382        };
1383
1384        // A particularly readable explanation of what's going on here can be found
1385        // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
1386        // see the code for "Algorithm CP" on p. 323.
1387        //
1388        // What's going on is we have some critical factorization (u, v) of the
1389        // needle, and we want to determine whether u is a suffix of
1390        // &v[..period]. If it is, we use "Algorithm CP1". Otherwise we use
1391        // "Algorithm CP2", which is optimized for when the period of the needle
1392        // is large.
1393        if needle[..crit_pos] == needle[period..period + crit_pos] {
1394            // short period case -- the period is exact
1395            // compute a separate critical factorization for the reversed needle
1396            // x = u' v' where |v'| < period(x).
1397            //
1398            // This is sped up by the period being known already.
1399            // Note that a case like x = "acba" may be factored exactly forwards
1400            // (crit_pos = 1, period = 3) while being factored with approximate
1401            // period in reverse (crit_pos = 2, period = 2). We use the given
1402            // reverse factorization but keep the exact period.
1403            let crit_pos_back = needle.len()
1404                - cmp::max(
1405                    TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
1406                    TwoWaySearcher::reverse_maximal_suffix(needle, period, true),
1407                );
1408
1409            TwoWaySearcher {
1410                crit_pos,
1411                crit_pos_back,
1412                period,
1413                byteset: Self::byteset_create(&needle[..period]),
1414
1415                position: 0,
1416                end,
1417                memory: 0,
1418                memory_back: needle.len(),
1419            }
1420        } else {
1421            // long period case -- we have an approximation to the actual period,
1422            // and don't use memorization.
1423            //
1424            // Approximate the period by lower bound max(|u|, |v|) + 1.
1425            // The critical factorization is efficient to use for both forward and
1426            // reverse search.
1427
1428            TwoWaySearcher {
1429                crit_pos,
1430                crit_pos_back: crit_pos,
1431                period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
1432                byteset: Self::byteset_create(needle),
1433
1434                position: 0,
1435                end,
1436                memory: usize::MAX, // Dummy value to signify that the period is long
1437                memory_back: usize::MAX,
1438            }
1439        }
1440    }
1441
1442    #[inline]
1443    fn byteset_create(bytes: &[u8]) -> u64 {
1444        bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
1445    }
1446
1447    #[inline]
1448    fn byteset_contains(&self, byte: u8) -> bool {
1449        (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
1450    }
1451
1452    // One of the main ideas of Two-Way is that we factorize the needle into
1453    // two halves, (u, v), and begin trying to find v in the haystack by scanning
1454    // left to right. If v matches, we try to match u by scanning right to left.
1455    // How far we can jump when we encounter a mismatch is all based on the fact
1456    // that (u, v) is a critical factorization for the needle.
1457    #[inline]
1458    fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1459    where
1460        S: TwoWayStrategy,
1461    {
1462        // `next()` uses `self.position` as its cursor
1463        let old_pos = self.position;
1464        let needle_last = needle.len() - 1;
1465        'search: loop {
1466            // Check that we have room to search in
1467            // position + needle_last can not overflow if we assume slices
1468            // are bounded by isize's range.
1469            let tail_byte = match haystack.get(self.position + needle_last) {
1470                Some(&b) => b,
1471                None => {
1472                    self.position = haystack.len();
1473                    return S::rejecting(old_pos, self.position);
1474                }
1475            };
1476
1477            if S::use_early_reject() && old_pos != self.position {
1478                return S::rejecting(old_pos, self.position);
1479            }
1480
1481            // Quickly skip by large portions unrelated to our substring
1482            if !self.byteset_contains(tail_byte) {
1483                self.position += needle.len();
1484                if !long_period {
1485                    self.memory = 0;
1486                }
1487                continue 'search;
1488            }
1489
1490            // See if the right part of the needle matches
1491            let start =
1492                if long_period { self.crit_pos } else { cmp::max(self.crit_pos, self.memory) };
1493            for i in start..needle.len() {
1494                if needle[i] != haystack[self.position + i] {
1495                    self.position += i - self.crit_pos + 1;
1496                    if !long_period {
1497                        self.memory = 0;
1498                    }
1499                    continue 'search;
1500                }
1501            }
1502
1503            // See if the left part of the needle matches
1504            let start = if long_period { 0 } else { self.memory };
1505            for i in (start..self.crit_pos).rev() {
1506                if needle[i] != haystack[self.position + i] {
1507                    self.position += self.period;
1508                    if !long_period {
1509                        self.memory = needle.len() - self.period;
1510                    }
1511                    continue 'search;
1512                }
1513            }
1514
1515            // We have found a match!
1516            let match_pos = self.position;
1517
1518            // Note: add self.period instead of needle.len() to have overlapping matches
1519            self.position += needle.len();
1520            if !long_period {
1521                self.memory = 0; // set to needle.len() - self.period for overlapping matches
1522            }
1523
1524            return S::matching(match_pos, match_pos + needle.len());
1525        }
1526    }
1527
1528    // Follows the ideas in `next()`.
1529    //
1530    // The definitions are symmetrical, with period(x) = period(reverse(x))
1531    // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
1532    // is a critical factorization, so is (reverse(v), reverse(u)).
1533    //
1534    // For the reverse case we have computed a critical factorization x = u' v'
1535    // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
1536    // thus |v'| < period(x) for the reverse.
1537    //
1538    // To search in reverse through the haystack, we search forward through
1539    // a reversed haystack with a reversed needle, matching first u' and then v'.
1540    #[inline]
1541    fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1542    where
1543        S: TwoWayStrategy,
1544    {
1545        // `next_back()` uses `self.end` as its cursor -- so that `next()` and `next_back()`
1546        // are independent.
1547        let old_end = self.end;
1548        'search: loop {
1549            // Check that we have room to search in
1550            // end - needle.len() will wrap around when there is no more room,
1551            // but due to slice length limits it can never wrap all the way back
1552            // into the length of haystack.
1553            let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
1554                Some(&b) => b,
1555                None => {
1556                    self.end = 0;
1557                    return S::rejecting(0, old_end);
1558                }
1559            };
1560
1561            if S::use_early_reject() && old_end != self.end {
1562                return S::rejecting(self.end, old_end);
1563            }
1564
1565            // Quickly skip by large portions unrelated to our substring
1566            if !self.byteset_contains(front_byte) {
1567                self.end -= needle.len();
1568                if !long_period {
1569                    self.memory_back = needle.len();
1570                }
1571                continue 'search;
1572            }
1573
1574            // See if the left part of the needle matches
1575            let crit = if long_period {
1576                self.crit_pos_back
1577            } else {
1578                cmp::min(self.crit_pos_back, self.memory_back)
1579            };
1580            for i in (0..crit).rev() {
1581                if needle[i] != haystack[self.end - needle.len() + i] {
1582                    self.end -= self.crit_pos_back - i;
1583                    if !long_period {
1584                        self.memory_back = needle.len();
1585                    }
1586                    continue 'search;
1587                }
1588            }
1589
1590            // See if the right part of the needle matches
1591            let needle_end = if long_period { needle.len() } else { self.memory_back };
1592            for i in self.crit_pos_back..needle_end {
1593                if needle[i] != haystack[self.end - needle.len() + i] {
1594                    self.end -= self.period;
1595                    if !long_period {
1596                        self.memory_back = self.period;
1597                    }
1598                    continue 'search;
1599                }
1600            }
1601
1602            // We have found a match!
1603            let match_pos = self.end - needle.len();
1604            // Note: sub self.period instead of needle.len() to have overlapping matches
1605            self.end -= needle.len();
1606            if !long_period {
1607                self.memory_back = needle.len();
1608            }
1609
1610            return S::matching(match_pos, match_pos + needle.len());
1611        }
1612    }
1613
1614    // Compute the maximal suffix of `arr`.
1615    //
1616    // The maximal suffix is a possible critical factorization (u, v) of `arr`.
1617    //
1618    // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the
1619    // period of v.
1620    //
1621    // `order_greater` determines if lexical order is `<` or `>`. Both
1622    // orders must be computed -- the ordering with the largest `i` gives
1623    // a critical factorization.
1624    //
1625    // For long period cases, the resulting period is not exact (it is too short).
1626    #[inline]
1627    fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1628        let mut left = 0; // Corresponds to i in the paper
1629        let mut right = 1; // Corresponds to j in the paper
1630        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1631        // to match 0-based indexing.
1632        let mut period = 1; // Corresponds to p in the paper
1633
1634        while let Some(&a) = arr.get(right + offset) {
1635            // `left` will be inbounds when `right` is.
1636            let b = arr[left + offset];
1637            if (a < b && !order_greater) || (a > b && order_greater) {
1638                // Suffix is smaller, period is entire prefix so far.
1639                right += offset + 1;
1640                offset = 0;
1641                period = right - left;
1642            } else if a == b {
1643                // Advance through repetition of the current period.
1644                if offset + 1 == period {
1645                    right += offset + 1;
1646                    offset = 0;
1647                } else {
1648                    offset += 1;
1649                }
1650            } else {
1651                // Suffix is larger, start over from current location.
1652                left = right;
1653                right += 1;
1654                offset = 0;
1655                period = 1;
1656            }
1657        }
1658        (left, period)
1659    }
1660
1661    // Compute the maximal suffix of the reverse of `arr`.
1662    //
1663    // The maximal suffix is a possible critical factorization (u', v') of `arr`.
1664    //
1665    // Returns `i` where `i` is the starting index of v', from the back;
1666    // returns immediately when a period of `known_period` is reached.
1667    //
1668    // `order_greater` determines if lexical order is `<` or `>`. Both
1669    // orders must be computed -- the ordering with the largest `i` gives
1670    // a critical factorization.
1671    //
1672    // For long period cases, the resulting period is not exact (it is too short).
1673    fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1674        let mut left = 0; // Corresponds to i in the paper
1675        let mut right = 1; // Corresponds to j in the paper
1676        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1677        // to match 0-based indexing.
1678        let mut period = 1; // Corresponds to p in the paper
1679        let n = arr.len();
1680
1681        while right + offset < n {
1682            let a = arr[n - (1 + right + offset)];
1683            let b = arr[n - (1 + left + offset)];
1684            if (a < b && !order_greater) || (a > b && order_greater) {
1685                // Suffix is smaller, period is entire prefix so far.
1686                right += offset + 1;
1687                offset = 0;
1688                period = right - left;
1689            } else if a == b {
1690                // Advance through repetition of the current period.
1691                if offset + 1 == period {
1692                    right += offset + 1;
1693                    offset = 0;
1694                } else {
1695                    offset += 1;
1696                }
1697            } else {
1698                // Suffix is larger, start over from current location.
1699                left = right;
1700                right += 1;
1701                offset = 0;
1702                period = 1;
1703            }
1704            if period == known_period {
1705                break;
1706            }
1707        }
1708        debug_assert!(period <= known_period);
1709        left
1710    }
1711}
1712
1713// TwoWayStrategy allows the algorithm to either skip non-matches as quickly
1714// as possible, or to work in a mode where it emits Rejects relatively quickly.
1715trait TwoWayStrategy {
1716    type Output;
1717    fn use_early_reject() -> bool;
1718    fn rejecting(a: usize, b: usize) -> Self::Output;
1719    fn matching(a: usize, b: usize) -> Self::Output;
1720}
1721
1722/// Skip to match intervals as quickly as possible
1723enum MatchOnly {}
1724
1725impl TwoWayStrategy for MatchOnly {
1726    type Output = Option<(usize, usize)>;
1727
1728    #[inline]
1729    fn use_early_reject() -> bool {
1730        false
1731    }
1732    #[inline]
1733    fn rejecting(_a: usize, _b: usize) -> Self::Output {
1734        None
1735    }
1736    #[inline]
1737    fn matching(a: usize, b: usize) -> Self::Output {
1738        Some((a, b))
1739    }
1740}
1741
1742/// Emit Rejects regularly
1743enum RejectAndMatch {}
1744
1745impl TwoWayStrategy for RejectAndMatch {
1746    type Output = SearchStep;
1747
1748    #[inline]
1749    fn use_early_reject() -> bool {
1750        true
1751    }
1752    #[inline]
1753    fn rejecting(a: usize, b: usize) -> Self::Output {
1754        SearchStep::Reject(a, b)
1755    }
1756    #[inline]
1757    fn matching(a: usize, b: usize) -> Self::Output {
1758        SearchStep::Match(a, b)
1759    }
1760}
1761
1762/// SIMD search for short needles based on
1763/// Wojciech Muła's "SIMD-friendly algorithms for substring searching"[0]
1764///
1765/// It skips ahead by the vector width on each iteration (rather than the needle length as two-way
1766/// does) by probing the first and last byte of the needle for the whole vector width
1767/// and only doing full needle comparisons when the vectorized probe indicated potential matches.
1768///
1769/// Since the x86_64 baseline only offers SSE2 we only use u8x16 here.
1770/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors
1771/// should be evaluated.
1772///
1773/// For haystacks smaller than vector-size + needle length it falls back to
1774/// a naive O(n*m) search so this implementation should not be called on larger needles.
1775///
1776/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2
1777#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
1778#[inline]
1779fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
1780    let needle = needle.as_bytes();
1781    let haystack = haystack.as_bytes();
1782
1783    debug_assert!(needle.len() > 1);
1784
1785    use crate::ops::BitAnd;
1786    use crate::simd::cmp::SimdPartialEq;
1787    use crate::simd::{mask8x16 as Mask, u8x16 as Block};
1788
1789    let first_probe = needle[0];
1790    let last_byte_offset = needle.len() - 1;
1791
1792    // the offset used for the 2nd vector
1793    let second_probe_offset = if needle.len() == 2 {
1794        // never bail out on len=2 needles because the probes will fully cover them and have
1795        // no degenerate cases.
1796        1
1797    } else {
1798        // try a few bytes in case first and last byte of the needle are the same
1799        let Some(second_probe_offset) =
1800            (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe)
1801        else {
1802            // fall back to other search methods if we can't find any different bytes
1803            // since we could otherwise hit some degenerate cases
1804            return None;
1805        };
1806        second_probe_offset
1807    };
1808
1809    // do a naive search if the haystack is too small to fit
1810    if haystack.len() < Block::LEN + last_byte_offset {
1811        return Some(haystack.windows(needle.len()).any(|c| c == needle));
1812    }
1813
1814    let first_probe: Block = Block::splat(first_probe);
1815    let second_probe: Block = Block::splat(needle[second_probe_offset]);
1816    // first byte are already checked by the outer loop. to verify a match only the
1817    // remainder has to be compared.
1818    let trimmed_needle = &needle[1..];
1819
1820    // this #[cold] is load-bearing, benchmark before removing it...
1821    let check_mask = #[cold]
1822    |idx, mask: u16, skip: bool| -> bool {
1823        if skip {
1824            return false;
1825        }
1826
1827        // and so is this. optimizations are weird.
1828        let mut mask = mask;
1829
1830        while mask != 0 {
1831            let trailing = mask.trailing_zeros();
1832            let offset = idx + trailing as usize + 1;
1833            // SAFETY: mask is between 0 and 15 trailing zeroes, we skip one additional byte that was already compared
1834            // and then take trimmed_needle.len() bytes. This is within the bounds defined by the outer loop
1835            unsafe {
1836                let sub = haystack.get_unchecked(offset..).get_unchecked(..trimmed_needle.len());
1837                if small_slice_eq(sub, trimmed_needle) {
1838                    return true;
1839                }
1840            }
1841            mask &= !(1 << trailing);
1842        }
1843        false
1844    };
1845
1846    let test_chunk = |idx| -> u16 {
1847        // SAFETY: this requires at least LANES bytes being readable at idx
1848        // that is ensured by the loop ranges (see comments below)
1849        let a: Block = unsafe { haystack.as_ptr().add(idx).cast::<Block>().read_unaligned() };
1850        // SAFETY: this requires LANES + block_offset bytes being readable at idx
1851        let b: Block = unsafe {
1852            haystack.as_ptr().add(idx).add(second_probe_offset).cast::<Block>().read_unaligned()
1853        };
1854        let eq_first: Mask = a.simd_eq(first_probe);
1855        let eq_last: Mask = b.simd_eq(second_probe);
1856        let both = eq_first.bitand(eq_last);
1857        let mask = both.to_bitmask() as u16;
1858
1859        mask
1860    };
1861
1862    let mut i = 0;
1863    let mut result = false;
1864    // The loop condition must ensure that there's enough headroom to read LANE bytes,
1865    // and not only at the current index but also at the index shifted by block_offset
1866    const UNROLL: usize = 4;
1867    while i + last_byte_offset + UNROLL * Block::LEN < haystack.len() && !result {
1868        let mut masks = [0u16; UNROLL];
1869        for j in 0..UNROLL {
1870            masks[j] = test_chunk(i + j * Block::LEN);
1871        }
1872        for j in 0..UNROLL {
1873            let mask = masks[j];
1874            if mask != 0 {
1875                result |= check_mask(i + j * Block::LEN, mask, result);
1876            }
1877        }
1878        i += UNROLL * Block::LEN;
1879    }
1880    while i + last_byte_offset + Block::LEN < haystack.len() && !result {
1881        let mask = test_chunk(i);
1882        if mask != 0 {
1883            result |= check_mask(i, mask, result);
1884        }
1885        i += Block::LEN;
1886    }
1887
1888    // Process the tail that didn't fit into LANES-sized steps.
1889    // This simply repeats the same procedure but as right-aligned chunk instead
1890    // of a left-aligned one. The last byte must be exactly flush with the string end so
1891    // we don't miss a single byte or read out of bounds.
1892    let i = haystack.len() - last_byte_offset - Block::LEN;
1893    let mask = test_chunk(i);
1894    if mask != 0 {
1895        result |= check_mask(i, mask, result);
1896    }
1897
1898    Some(result)
1899}
1900
1901/// Compares short slices for equality.
1902///
1903/// It avoids a call to libc's memcmp which is faster on long slices
1904/// due to SIMD optimizations but it incurs a function call overhead.
1905///
1906/// # Safety
1907///
1908/// Both slices must have the same length.
1909#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86
1910#[inline]
1911unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool {
1912    debug_assert_eq!(x.len(), y.len());
1913    // This function is adapted from
1914    // https://github.com/BurntSushi/memchr/blob/8037d11b4357b0f07be2bb66dc2659d9cf28ad32/src/memmem/util.rs#L32
1915
1916    // If we don't have enough bytes to do 4-byte at a time loads, then
1917    // fall back to the naive slow version.
1918    //
1919    // Potential alternative: We could do a copy_nonoverlapping combined with a mask instead
1920    // of a loop. Benchmark it.
1921    if x.len() < 4 {
1922        for (&b1, &b2) in x.iter().zip(y) {
1923            if b1 != b2 {
1924                return false;
1925            }
1926        }
1927        return true;
1928    }
1929    // When we have 4 or more bytes to compare, then proceed in chunks of 4 at
1930    // a time using unaligned loads.
1931    //
1932    // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
1933    // that this particular version of memcmp is likely to be called with tiny
1934    // needles. That means that if we do 8 byte loads, then a higher proportion
1935    // of memcmp calls will use the slower variant above. With that said, this
1936    // is a hypothesis and is only loosely supported by benchmarks. There's
1937    // likely some improvement that could be made here. The main thing here
1938    // though is to optimize for latency, not throughput.
1939
1940    // SAFETY: Via the conditional above, we know that both `px` and `py`
1941    // have the same length, so `px < pxend` implies that `py < pyend`.
1942    // Thus, dereferencing both `px` and `py` in the loop below is safe.
1943    //
1944    // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual
1945    // end of `px` and `py`. Thus, the final dereference outside of the
1946    // loop is guaranteed to be valid. (The final comparison will overlap with
1947    // the last comparison done in the loop for lengths that aren't multiples
1948    // of four.)
1949    //
1950    // Finally, we needn't worry about alignment here, since we do unaligned
1951    // loads.
1952    unsafe {
1953        let (mut px, mut py) = (x.as_ptr(), y.as_ptr());
1954        let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4));
1955        while px < pxend {
1956            let vx = (px as *const u32).read_unaligned();
1957            let vy = (py as *const u32).read_unaligned();
1958            if vx != vy {
1959                return false;
1960            }
1961            px = px.add(4);
1962            py = py.add(4);
1963        }
1964        let vx = (pxend as *const u32).read_unaligned();
1965        let vy = (pyend as *const u32).read_unaligned();
1966        vx == vy
1967    }
1968}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy