core/char/
mod.rs

1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape;
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62    Lu  Uppercase_Letter        an uppercase letter
63    Ll  Lowercase_Letter        a lowercase letter
64    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
65    Lm  Modifier_Letter         a modifier letter
66    Lo  Other_Letter            other letters, including syllables and ideographs
67    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
68    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
69    Me  Enclosing_Mark          an enclosing combining mark
70    Nd  Decimal_Number          a decimal digit
71    Nl  Letter_Number           a letterlike numeric character
72    No  Other_Number            a numeric character of other type
73    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
74    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
75    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
76    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
77    Pi  Initial_Punctuation     an initial quotation mark
78    Pf  Final_Punctuation       a final quotation mark
79    Po  Other_Punctuation       a punctuation mark of other type
80    Sm  Math_Symbol             a symbol of primarily mathematical use
81    Sc  Currency_Symbol         a currency sign
82    Sk  Modifier_Symbol         a non-letterlike modifier symbol
83    So  Other_Symbol            a symbol of other type
84    Zs  Space_Separator         a space character (of various non-zero widths)
85    Zl  Line_Separator          U+2028 LINE SEPARATOR only
86    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
87    Cc  Control                 a C0 or C1 control code
88    Cf  Format                  a format control character
89    Cs  Surrogate               a surrogate code point
90    Co  Private_Use             a private-use character
91    Cn  Unassigned              a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96pub const MAX: char = char::MAX;
97
98/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
99/// UTF-8 encoding.
100#[unstable(feature = "char_max_len", issue = "121714")]
101pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
102
103/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
104/// to UTF-16 encoding.
105#[unstable(feature = "char_max_len", issue = "121714")]
106pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
107
108/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
109/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
110#[stable(feature = "decode_utf16", since = "1.9.0")]
111pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
112
113/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
114/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
115#[stable(feature = "unicode_version", since = "1.45.0")]
116pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
117
118/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
119/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
120#[stable(feature = "decode_utf16", since = "1.9.0")]
121#[inline]
122pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123    self::decode::decode_utf16(iter)
124}
125
126/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127#[stable(feature = "rust1", since = "1.0.0")]
128#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129#[must_use]
130#[inline]
131pub const fn from_u32(i: u32) -> Option<char> {
132    self::convert::from_u32(i)
133}
134
135/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
136/// instead.
137#[stable(feature = "char_from_unchecked", since = "1.5.0")]
138#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
139#[must_use]
140#[inline]
141pub const unsafe fn from_u32_unchecked(i: u32) -> char {
142    // SAFETY: the safety contract must be upheld by the caller.
143    unsafe { self::convert::from_u32_unchecked(i) }
144}
145
146/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
147#[stable(feature = "rust1", since = "1.0.0")]
148#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
149#[must_use]
150#[inline]
151pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
152    self::convert::from_digit(num, radix)
153}
154
155/// Returns an iterator that yields the hexadecimal Unicode escape of a
156/// character, as `char`s.
157///
158/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
159/// its documentation for more.
160///
161/// [`escape_unicode`]: char::escape_unicode
162#[derive(Clone, Debug)]
163#[stable(feature = "rust1", since = "1.0.0")]
164pub struct EscapeUnicode(escape::EscapeIterInner<10>);
165
166impl EscapeUnicode {
167    #[inline]
168    const fn new(c: char) -> Self {
169        Self(escape::EscapeIterInner::unicode(c))
170    }
171}
172
173#[stable(feature = "rust1", since = "1.0.0")]
174impl Iterator for EscapeUnicode {
175    type Item = char;
176
177    #[inline]
178    fn next(&mut self) -> Option<char> {
179        self.0.next().map(char::from)
180    }
181
182    #[inline]
183    fn size_hint(&self) -> (usize, Option<usize>) {
184        let n = self.0.len();
185        (n, Some(n))
186    }
187
188    #[inline]
189    fn count(self) -> usize {
190        self.0.len()
191    }
192
193    #[inline]
194    fn last(mut self) -> Option<char> {
195        self.0.next_back().map(char::from)
196    }
197
198    #[inline]
199    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
200        self.0.advance_by(n)
201    }
202}
203
204#[stable(feature = "exact_size_escape", since = "1.11.0")]
205impl ExactSizeIterator for EscapeUnicode {
206    #[inline]
207    fn len(&self) -> usize {
208        self.0.len()
209    }
210}
211
212#[stable(feature = "fused", since = "1.26.0")]
213impl FusedIterator for EscapeUnicode {}
214
215#[stable(feature = "char_struct_display", since = "1.16.0")]
216impl fmt::Display for EscapeUnicode {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        f.write_str(self.0.as_str())
219    }
220}
221
222/// An iterator that yields the literal escape code of a `char`.
223///
224/// This `struct` is created by the [`escape_default`] method on [`char`]. See
225/// its documentation for more.
226///
227/// [`escape_default`]: char::escape_default
228#[derive(Clone, Debug)]
229#[stable(feature = "rust1", since = "1.0.0")]
230pub struct EscapeDefault(escape::EscapeIterInner<10>);
231
232impl EscapeDefault {
233    #[inline]
234    const fn printable(c: ascii::Char) -> Self {
235        Self(escape::EscapeIterInner::ascii(c.to_u8()))
236    }
237
238    #[inline]
239    const fn backslash(c: ascii::Char) -> Self {
240        Self(escape::EscapeIterInner::backslash(c))
241    }
242
243    #[inline]
244    const fn unicode(c: char) -> Self {
245        Self(escape::EscapeIterInner::unicode(c))
246    }
247}
248
249#[stable(feature = "rust1", since = "1.0.0")]
250impl Iterator for EscapeDefault {
251    type Item = char;
252
253    #[inline]
254    fn next(&mut self) -> Option<char> {
255        self.0.next().map(char::from)
256    }
257
258    #[inline]
259    fn size_hint(&self) -> (usize, Option<usize>) {
260        let n = self.0.len();
261        (n, Some(n))
262    }
263
264    #[inline]
265    fn count(self) -> usize {
266        self.0.len()
267    }
268
269    #[inline]
270    fn last(mut self) -> Option<char> {
271        self.0.next_back().map(char::from)
272    }
273
274    #[inline]
275    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
276        self.0.advance_by(n)
277    }
278}
279
280#[stable(feature = "exact_size_escape", since = "1.11.0")]
281impl ExactSizeIterator for EscapeDefault {
282    #[inline]
283    fn len(&self) -> usize {
284        self.0.len()
285    }
286}
287
288#[stable(feature = "fused", since = "1.26.0")]
289impl FusedIterator for EscapeDefault {}
290
291#[stable(feature = "char_struct_display", since = "1.16.0")]
292impl fmt::Display for EscapeDefault {
293    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294        f.write_str(self.0.as_str())
295    }
296}
297
298/// An iterator that yields the literal escape code of a `char`.
299///
300/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
301/// documentation for more.
302///
303/// [`escape_debug`]: char::escape_debug
304#[stable(feature = "char_escape_debug", since = "1.20.0")]
305#[derive(Clone, Debug)]
306pub struct EscapeDebug(EscapeDebugInner);
307
308#[derive(Clone, Debug)]
309// Note: It’s possible to manually encode the EscapeDebugInner inside of
310// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
311// a char) which would likely result in a more optimised code.  For now we use
312// the option easier to implement.
313enum EscapeDebugInner {
314    Bytes(escape::EscapeIterInner<10>),
315    Char(char),
316}
317
318impl EscapeDebug {
319    #[inline]
320    const fn printable(chr: char) -> Self {
321        Self(EscapeDebugInner::Char(chr))
322    }
323
324    #[inline]
325    const fn backslash(c: ascii::Char) -> Self {
326        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
327    }
328
329    #[inline]
330    const fn unicode(c: char) -> Self {
331        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
332    }
333
334    #[inline]
335    fn clear(&mut self) {
336        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
337    }
338}
339
340#[stable(feature = "char_escape_debug", since = "1.20.0")]
341impl Iterator for EscapeDebug {
342    type Item = char;
343
344    #[inline]
345    fn next(&mut self) -> Option<char> {
346        match self.0 {
347            EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
348            EscapeDebugInner::Char(chr) => {
349                self.clear();
350                Some(chr)
351            }
352        }
353    }
354
355    #[inline]
356    fn size_hint(&self) -> (usize, Option<usize>) {
357        let n = self.len();
358        (n, Some(n))
359    }
360
361    #[inline]
362    fn count(self) -> usize {
363        self.len()
364    }
365}
366
367#[stable(feature = "char_escape_debug", since = "1.20.0")]
368impl ExactSizeIterator for EscapeDebug {
369    fn len(&self) -> usize {
370        match &self.0 {
371            EscapeDebugInner::Bytes(bytes) => bytes.len(),
372            EscapeDebugInner::Char(_) => 1,
373        }
374    }
375}
376
377#[stable(feature = "fused", since = "1.26.0")]
378impl FusedIterator for EscapeDebug {}
379
380#[stable(feature = "char_escape_debug", since = "1.20.0")]
381impl fmt::Display for EscapeDebug {
382    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383        match &self.0 {
384            EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
385            EscapeDebugInner::Char(chr) => f.write_char(*chr),
386        }
387    }
388}
389
390macro_rules! casemappingiter_impls {
391    ($(#[$attr:meta])* $ITER_NAME:ident) => {
392        $(#[$attr])*
393        #[stable(feature = "rust1", since = "1.0.0")]
394        #[derive(Debug, Clone)]
395        pub struct $ITER_NAME(CaseMappingIter);
396
397        #[stable(feature = "rust1", since = "1.0.0")]
398        impl Iterator for $ITER_NAME {
399            type Item = char;
400            fn next(&mut self) -> Option<char> {
401                self.0.next()
402            }
403
404            fn size_hint(&self) -> (usize, Option<usize>) {
405                self.0.size_hint()
406            }
407
408            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
409            where
410                Fold: FnMut(Acc, Self::Item) -> Acc,
411            {
412                self.0.fold(init, fold)
413            }
414
415            fn count(self) -> usize {
416                self.0.count()
417            }
418
419            fn last(self) -> Option<Self::Item> {
420                self.0.last()
421            }
422
423            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
424                self.0.advance_by(n)
425            }
426
427            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
428                // SAFETY: just forwarding requirements to caller
429                unsafe { self.0.__iterator_get_unchecked(idx) }
430            }
431        }
432
433        #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
434        impl DoubleEndedIterator for $ITER_NAME {
435            fn next_back(&mut self) -> Option<char> {
436                self.0.next_back()
437            }
438
439            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
440            where
441                Fold: FnMut(Acc, Self::Item) -> Acc,
442            {
443                self.0.rfold(init, rfold)
444            }
445
446            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
447                self.0.advance_back_by(n)
448            }
449        }
450
451        #[stable(feature = "fused", since = "1.26.0")]
452        impl FusedIterator for $ITER_NAME {}
453
454        #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
455        impl ExactSizeIterator for $ITER_NAME {
456            fn len(&self) -> usize {
457                self.0.len()
458            }
459
460            fn is_empty(&self) -> bool {
461                self.0.is_empty()
462            }
463        }
464
465        // SAFETY: forwards to inner `array::IntoIter`
466        #[unstable(feature = "trusted_len", issue = "37572")]
467        unsafe impl TrustedLen for $ITER_NAME {}
468
469        // SAFETY: forwards to inner `array::IntoIter`
470        #[doc(hidden)]
471        #[unstable(feature = "std_internals", issue = "none")]
472        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
473            const MAY_HAVE_SIDE_EFFECT: bool = false;
474        }
475
476        // SAFETY: this iter has no subtypes/supertypes
477        #[doc(hidden)]
478        #[unstable(feature = "std_internals", issue = "none")]
479        unsafe impl TrustedRandomAccess for $ITER_NAME {}
480
481        #[stable(feature = "char_struct_display", since = "1.16.0")]
482        impl fmt::Display for $ITER_NAME {
483            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
484                fmt::Display::fmt(&self.0, f)
485            }
486        }
487    }
488}
489
490casemappingiter_impls! {
491    /// Returns an iterator that yields the lowercase equivalent of a `char`.
492    ///
493    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
494    /// its documentation for more.
495    ///
496    /// [`to_lowercase`]: char::to_lowercase
497    ToLowercase
498}
499
500casemappingiter_impls! {
501    /// Returns an iterator that yields the uppercase equivalent of a `char`.
502    ///
503    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
504    /// its documentation for more.
505    ///
506    /// [`to_uppercase`]: char::to_uppercase
507    ToUppercase
508}
509
510#[derive(Debug, Clone)]
511struct CaseMappingIter(core::array::IntoIter<char, 3>);
512
513impl CaseMappingIter {
514    #[inline]
515    fn new(chars: [char; 3]) -> CaseMappingIter {
516        let mut iter = chars.into_iter();
517        if chars[2] == '\0' {
518            iter.next_back();
519            if chars[1] == '\0' {
520                iter.next_back();
521
522                // Deliberately don't check `chars[0]`,
523                // as '\0' lowercases to itself
524            }
525        }
526        CaseMappingIter(iter)
527    }
528}
529
530impl Iterator for CaseMappingIter {
531    type Item = char;
532
533    fn next(&mut self) -> Option<char> {
534        self.0.next()
535    }
536
537    fn size_hint(&self) -> (usize, Option<usize>) {
538        self.0.size_hint()
539    }
540
541    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
542    where
543        Fold: FnMut(Acc, Self::Item) -> Acc,
544    {
545        self.0.fold(init, fold)
546    }
547
548    fn count(self) -> usize {
549        self.0.count()
550    }
551
552    fn last(self) -> Option<Self::Item> {
553        self.0.last()
554    }
555
556    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
557        self.0.advance_by(n)
558    }
559
560    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
561        // SAFETY: just forwarding requirements to caller
562        unsafe { self.0.__iterator_get_unchecked(idx) }
563    }
564}
565
566impl DoubleEndedIterator for CaseMappingIter {
567    fn next_back(&mut self) -> Option<char> {
568        self.0.next_back()
569    }
570
571    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
572    where
573        Fold: FnMut(Acc, Self::Item) -> Acc,
574    {
575        self.0.rfold(init, rfold)
576    }
577
578    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
579        self.0.advance_back_by(n)
580    }
581}
582
583impl ExactSizeIterator for CaseMappingIter {
584    fn len(&self) -> usize {
585        self.0.len()
586    }
587
588    fn is_empty(&self) -> bool {
589        self.0.is_empty()
590    }
591}
592
593impl FusedIterator for CaseMappingIter {}
594
595// SAFETY: forwards to inner `array::IntoIter`
596unsafe impl TrustedLen for CaseMappingIter {}
597
598// SAFETY: forwards to inner `array::IntoIter`
599unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
600    const MAY_HAVE_SIDE_EFFECT: bool = false;
601}
602
603// SAFETY: `CaseMappingIter` has no subtypes/supertypes
604unsafe impl TrustedRandomAccess for CaseMappingIter {}
605
606impl fmt::Display for CaseMappingIter {
607    #[inline]
608    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
609        for c in self.0.clone() {
610            f.write_char(c)?;
611        }
612        Ok(())
613    }
614}
615
616/// The error type returned when a checked char conversion fails.
617#[stable(feature = "u8_from_char", since = "1.59.0")]
618#[derive(Debug, Copy, Clone, PartialEq, Eq)]
619pub struct TryFromCharError(pub(crate) ());
620
621#[stable(feature = "u8_from_char", since = "1.59.0")]
622impl fmt::Display for TryFromCharError {
623    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
624        "unicode code point out of range".fmt(fmt)
625    }
626}
627
628#[stable(feature = "u8_from_char", since = "1.59.0")]
629impl Error for TryFromCharError {}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy