From 4484085b18df4b10243b503a21602bb71836e8b3 Mon Sep 17 00:00:00 2001 From: Laiho Date: Mon, 9 Sep 2024 00:58:52 +0300 Subject: [PATCH] Add fast path for ascii to ascii in str::replace --- library/alloc/src/str.rs | 25 ++++++++++++++++++++++++- library/alloc/src/string.rs | 7 ++++++- library/core/src/str/pattern.rs | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index 32212b61c6e87..709d11c52a06b 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -19,7 +19,7 @@ pub use core::str::SplitInclusive; pub use core::str::SplitWhitespace; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::pattern; -use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher, Utf8Pattern}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::str::{Bytes, CharIndices, Chars, from_utf8, from_utf8_mut}; #[stable(feature = "str_escape", since = "1.34.0")] @@ -268,6 +268,18 @@ impl str { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn replace(&self, from: P, to: &str) -> String { + // Fast path for ASCII to ASCII case. + + if let Some(from_byte) = match from.as_utf8_pattern() { + Some(Utf8Pattern::StringPattern([from_byte])) => Some(*from_byte), + Some(Utf8Pattern::CharPattern(c)) => c.as_ascii().map(|ascii_char| ascii_char.to_u8()), + _ => None, + } { + if let [to_byte] = to.as_bytes() { + return unsafe { replace_ascii(self.as_bytes(), from_byte, *to_byte) }; + } + } + let mut result = String::new(); let mut last_end = 0; for (start, part) in self.match_indices(from) { @@ -661,3 +673,14 @@ fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec { out } +#[inline] +#[cfg(not(test))] +#[cfg(not(no_global_oom_handling))] +#[allow(dead_code)] +/// Faster implementation of string replacement for ASCII to ASCII cases. +/// Should produce fast vectorized code. +unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String { + let result: Vec = utf8_bytes.iter().map(|b| if *b == from { to } else { *b }).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(result) } +} diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index ee878e879e98a..0d1600a28aa74 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -53,7 +53,7 @@ use core::ops::AddAssign; #[cfg(not(no_global_oom_handling))] use core::ops::Bound::{Excluded, Included, Unbounded}; use core::ops::{self, Range, RangeBounds}; -use core::str::pattern::Pattern; +use core::str::pattern::{Pattern, Utf8Pattern}; use core::{fmt, hash, ptr, slice}; #[cfg(not(no_global_oom_handling))] @@ -2424,6 +2424,11 @@ impl<'b> Pattern for &'b String { { self[..].strip_suffix_of(haystack) } + + #[inline] + fn as_utf8_pattern(&self) -> Option> { + Some(Utf8Pattern::StringPattern(self.as_bytes())) + } } macro_rules! impl_eq { diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 9f1294d760647..eb60effe8138f 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -160,6 +160,19 @@ pub trait Pattern: Sized { None } } + + /// Returns the pattern as utf-8 bytes if possible. + fn as_utf8_pattern(&self) -> Option>; +} +/// Result of calling [`Pattern::as_utf8_pattern()`]. +/// Can be used for inspecting the contents of a [`Pattern`] in cases +/// where the underlying representation can be represented as UTF-8. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum Utf8Pattern<'a> { + /// Type returned by String and str types. + StringPattern(&'a [u8]), + /// Type returned by char types. + CharPattern(char), } // Searcher @@ -599,6 +612,11 @@ impl Pattern for char { { self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack) } + + #[inline] + fn as_utf8_pattern(&self) -> Option> { + Some(Utf8Pattern::CharPattern(*self)) + } } ///////////////////////////////////////////////////////////////////////////// @@ -657,6 +675,11 @@ impl Pattern for MultiCharEqPattern { fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> { MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() } } + + #[inline] + fn as_utf8_pattern(&self) -> Option> { + None + } } unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> { @@ -747,6 +770,11 @@ macro_rules! pattern_methods { { ($pmap)(self).strip_suffix_of(haystack) } + + #[inline] + fn as_utf8_pattern(&self) -> Option> { + None + } }; } @@ -1022,6 +1050,11 @@ impl<'b> Pattern for &'b str { None } } + + #[inline] + fn as_utf8_pattern(&self) -> Option> { + Some(Utf8Pattern::StringPattern(self.as_bytes())) + } } ///////////////////////////////////////////////////////////////////////////// pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy