core/char/
mod.rs

1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
12//!
13//! This module exists for technical reasons, the primary documentation for
14//! `char` is directly on [the `char` primitive type][char] itself.
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "rust1", since = "1.0.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
28#[rustfmt::skip]
29#[stable(feature = "try_from", since = "1.34.0")]
30pub use self::convert::CharTryFromError;
31#[stable(feature = "char_from_str", since = "1.20.0")]
32pub use self::convert::ParseCharError;
33#[stable(feature = "decode_utf16", since = "1.9.0")]
34pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35
36// perma-unstable re-exports
37#[rustfmt::skip]
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf16_raw; // perma-unstable
40#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-unstable
42
43#[rustfmt::skip]
44use crate::ascii;
45pub(crate) use self::methods::EscapeDebugExtArgs;
46use crate::error::Error;
47use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
48use crate::fmt::{self, Write};
49use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
50use crate::num::NonZero;
51
52// UTF-8 ranges and tags for encoding characters
53const TAG_CONT: u8 = 0b1000_0000;
54const TAG_TWO_B: u8 = 0b1100_0000;
55const TAG_THREE_B: u8 = 0b1110_0000;
56const TAG_FOUR_B: u8 = 0b1111_0000;
57const MAX_ONE_B: u32 = 0x80;
58const MAX_TWO_B: u32 = 0x800;
59const MAX_THREE_B: u32 = 0x10000;
60
61/*
62    Lu  Uppercase_Letter        an uppercase letter
63    Ll  Lowercase_Letter        a lowercase letter
64    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
65    Lm  Modifier_Letter         a modifier letter
66    Lo  Other_Letter            other letters, including syllables and ideographs
67    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
68    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
69    Me  Enclosing_Mark          an enclosing combining mark
70    Nd  Decimal_Number          a decimal digit
71    Nl  Letter_Number           a letterlike numeric character
72    No  Other_Number            a numeric character of other type
73    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
74    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
75    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
76    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
77    Pi  Initial_Punctuation     an initial quotation mark
78    Pf  Final_Punctuation       a final quotation mark
79    Po  Other_Punctuation       a punctuation mark of other type
80    Sm  Math_Symbol             a symbol of primarily mathematical use
81    Sc  Currency_Symbol         a currency sign
82    Sk  Modifier_Symbol         a non-letterlike modifier symbol
83    So  Other_Symbol            a symbol of other type
84    Zs  Space_Separator         a space character (of various non-zero widths)
85    Zl  Line_Separator          U+2028 LINE SEPARATOR only
86    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
87    Cc  Control                 a C0 or C1 control code
88    Cf  Format                  a format control character
89    Cs  Surrogate               a surrogate code point
90    Co  Private_Use             a private-use character
91    Cn  Unassigned              a reserved unassigned code point or a noncharacter
92*/
93
94/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
95#[stable(feature = "rust1", since = "1.0.0")]
96pub const MAX: char = char::MAX;
97
98/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
99/// UTF-8 encoding.
100#[unstable(feature = "char_max_len", issue = "121714")]
101pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
102
103/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
104/// to UTF-16 encoding.
105#[unstable(feature = "char_max_len", issue = "121714")]
106pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
107
108/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
109/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
110#[stable(feature = "decode_utf16", since = "1.9.0")]
111pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
112
113/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
114/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
115#[stable(feature = "unicode_version", since = "1.45.0")]
116pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
117
118/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
119/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
120#[stable(feature = "decode_utf16", since = "1.9.0")]
121#[inline]
122pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
123    self::decode::decode_utf16(iter)
124}
125
126/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
127#[stable(feature = "rust1", since = "1.0.0")]
128#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
129#[must_use]
130#[inline]
131pub const fn from_u32(i: u32) -> Option<char> {
132    self::convert::from_u32(i)
133}
134
135/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
136/// instead.
137#[stable(feature = "char_from_unchecked", since = "1.5.0")]
138#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
139#[must_use]
140#[inline]
141pub const unsafe fn from_u32_unchecked(i: u32) -> char {
142    // SAFETY: the safety contract must be upheld by the caller.
143    unsafe { self::convert::from_u32_unchecked(i) }
144}
145
146/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
147#[stable(feature = "rust1", since = "1.0.0")]
148#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
149#[must_use]
150#[inline]
151pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
152    self::convert::from_digit(num, radix)
153}
154
155/// Returns an iterator that yields the hexadecimal Unicode escape of a
156/// character, as `char`s.
157///
158/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
159/// its documentation for more.
160///
161/// [`escape_unicode`]: char::escape_unicode
162#[derive(Clone, Debug)]
163#[stable(feature = "rust1", since = "1.0.0")]
164pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
165
166impl EscapeUnicode {
167    #[inline]
168    const fn new(c: char) -> Self {
169        Self(EscapeIterInner::unicode(c))
170    }
171}
172
173#[stable(feature = "rust1", since = "1.0.0")]
174impl Iterator for EscapeUnicode {
175    type Item = char;
176
177    #[inline]
178    fn next(&mut self) -> Option<char> {
179        self.0.next().map(char::from)
180    }
181
182    #[inline]
183    fn size_hint(&self) -> (usize, Option<usize>) {
184        let n = self.0.len();
185        (n, Some(n))
186    }
187
188    #[inline]
189    fn count(self) -> usize {
190        self.0.len()
191    }
192
193    #[inline]
194    fn last(mut self) -> Option<char> {
195        self.0.next_back().map(char::from)
196    }
197
198    #[inline]
199    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
200        self.0.advance_by(n)
201    }
202}
203
204#[stable(feature = "exact_size_escape", since = "1.11.0")]
205impl ExactSizeIterator for EscapeUnicode {
206    #[inline]
207    fn len(&self) -> usize {
208        self.0.len()
209    }
210}
211
212#[stable(feature = "fused", since = "1.26.0")]
213impl FusedIterator for EscapeUnicode {}
214
215#[stable(feature = "char_struct_display", since = "1.16.0")]
216impl fmt::Display for EscapeUnicode {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        fmt::Display::fmt(&self.0, f)
219    }
220}
221
222/// An iterator that yields the literal escape code of a `char`.
223///
224/// This `struct` is created by the [`escape_default`] method on [`char`]. See
225/// its documentation for more.
226///
227/// [`escape_default`]: char::escape_default
228#[derive(Clone, Debug)]
229#[stable(feature = "rust1", since = "1.0.0")]
230pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
231
232impl EscapeDefault {
233    #[inline]
234    const fn printable(c: ascii::Char) -> Self {
235        Self(EscapeIterInner::ascii(c.to_u8()))
236    }
237
238    #[inline]
239    const fn backslash(c: ascii::Char) -> Self {
240        Self(EscapeIterInner::backslash(c))
241    }
242
243    #[inline]
244    const fn unicode(c: char) -> Self {
245        Self(EscapeIterInner::unicode(c))
246    }
247}
248
249#[stable(feature = "rust1", since = "1.0.0")]
250impl Iterator for EscapeDefault {
251    type Item = char;
252
253    #[inline]
254    fn next(&mut self) -> Option<char> {
255        self.0.next().map(char::from)
256    }
257
258    #[inline]
259    fn size_hint(&self) -> (usize, Option<usize>) {
260        let n = self.0.len();
261        (n, Some(n))
262    }
263
264    #[inline]
265    fn count(self) -> usize {
266        self.0.len()
267    }
268
269    #[inline]
270    fn last(mut self) -> Option<char> {
271        self.0.next_back().map(char::from)
272    }
273
274    #[inline]
275    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
276        self.0.advance_by(n)
277    }
278}
279
280#[stable(feature = "exact_size_escape", since = "1.11.0")]
281impl ExactSizeIterator for EscapeDefault {
282    #[inline]
283    fn len(&self) -> usize {
284        self.0.len()
285    }
286}
287
288#[stable(feature = "fused", since = "1.26.0")]
289impl FusedIterator for EscapeDefault {}
290
291#[stable(feature = "char_struct_display", since = "1.16.0")]
292impl fmt::Display for EscapeDefault {
293    #[inline]
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        fmt::Display::fmt(&self.0, f)
296    }
297}
298
299/// An iterator that yields the literal escape code of a `char`.
300///
301/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
302/// documentation for more.
303///
304/// [`escape_debug`]: char::escape_debug
305#[stable(feature = "char_escape_debug", since = "1.20.0")]
306#[derive(Clone, Debug)]
307pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
308
309impl EscapeDebug {
310    #[inline]
311    const fn printable(chr: char) -> Self {
312        Self(EscapeIterInner::printable(chr))
313    }
314
315    #[inline]
316    const fn backslash(c: ascii::Char) -> Self {
317        Self(EscapeIterInner::backslash(c))
318    }
319
320    #[inline]
321    const fn unicode(c: char) -> Self {
322        Self(EscapeIterInner::unicode(c))
323    }
324}
325
326#[stable(feature = "char_escape_debug", since = "1.20.0")]
327impl Iterator for EscapeDebug {
328    type Item = char;
329
330    #[inline]
331    fn next(&mut self) -> Option<char> {
332        self.0.next()
333    }
334
335    #[inline]
336    fn size_hint(&self) -> (usize, Option<usize>) {
337        let n = self.len();
338        (n, Some(n))
339    }
340
341    #[inline]
342    fn count(self) -> usize {
343        self.len()
344    }
345}
346
347#[stable(feature = "char_escape_debug", since = "1.20.0")]
348impl ExactSizeIterator for EscapeDebug {
349    fn len(&self) -> usize {
350        self.0.len()
351    }
352}
353
354#[stable(feature = "fused", since = "1.26.0")]
355impl FusedIterator for EscapeDebug {}
356
357#[stable(feature = "char_escape_debug", since = "1.20.0")]
358impl fmt::Display for EscapeDebug {
359    #[inline]
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        fmt::Display::fmt(&self.0, f)
362    }
363}
364
365macro_rules! casemappingiter_impls {
366    ($(#[$attr:meta])* $ITER_NAME:ident) => {
367        $(#[$attr])*
368        #[stable(feature = "rust1", since = "1.0.0")]
369        #[derive(Debug, Clone)]
370        pub struct $ITER_NAME(CaseMappingIter);
371
372        #[stable(feature = "rust1", since = "1.0.0")]
373        impl Iterator for $ITER_NAME {
374            type Item = char;
375            fn next(&mut self) -> Option<char> {
376                self.0.next()
377            }
378
379            fn size_hint(&self) -> (usize, Option<usize>) {
380                self.0.size_hint()
381            }
382
383            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
384            where
385                Fold: FnMut(Acc, Self::Item) -> Acc,
386            {
387                self.0.fold(init, fold)
388            }
389
390            fn count(self) -> usize {
391                self.0.count()
392            }
393
394            fn last(self) -> Option<Self::Item> {
395                self.0.last()
396            }
397
398            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
399                self.0.advance_by(n)
400            }
401
402            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
403                // SAFETY: just forwarding requirements to caller
404                unsafe { self.0.__iterator_get_unchecked(idx) }
405            }
406        }
407
408        #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
409        impl DoubleEndedIterator for $ITER_NAME {
410            fn next_back(&mut self) -> Option<char> {
411                self.0.next_back()
412            }
413
414            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
415            where
416                Fold: FnMut(Acc, Self::Item) -> Acc,
417            {
418                self.0.rfold(init, rfold)
419            }
420
421            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
422                self.0.advance_back_by(n)
423            }
424        }
425
426        #[stable(feature = "fused", since = "1.26.0")]
427        impl FusedIterator for $ITER_NAME {}
428
429        #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
430        impl ExactSizeIterator for $ITER_NAME {
431            fn len(&self) -> usize {
432                self.0.len()
433            }
434
435            fn is_empty(&self) -> bool {
436                self.0.is_empty()
437            }
438        }
439
440        // SAFETY: forwards to inner `array::IntoIter`
441        #[unstable(feature = "trusted_len", issue = "37572")]
442        unsafe impl TrustedLen for $ITER_NAME {}
443
444        // SAFETY: forwards to inner `array::IntoIter`
445        #[doc(hidden)]
446        #[unstable(feature = "std_internals", issue = "none")]
447        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
448            const MAY_HAVE_SIDE_EFFECT: bool = false;
449        }
450
451        // SAFETY: this iter has no subtypes/supertypes
452        #[doc(hidden)]
453        #[unstable(feature = "std_internals", issue = "none")]
454        unsafe impl TrustedRandomAccess for $ITER_NAME {}
455
456        #[stable(feature = "char_struct_display", since = "1.16.0")]
457        impl fmt::Display for $ITER_NAME {
458            #[inline]
459            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
460                fmt::Display::fmt(&self.0, f)
461            }
462        }
463    }
464}
465
466casemappingiter_impls! {
467    /// Returns an iterator that yields the lowercase equivalent of a `char`.
468    ///
469    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
470    /// its documentation for more.
471    ///
472    /// [`to_lowercase`]: char::to_lowercase
473    ToLowercase
474}
475
476casemappingiter_impls! {
477    /// Returns an iterator that yields the uppercase equivalent of a `char`.
478    ///
479    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
480    /// its documentation for more.
481    ///
482    /// [`to_uppercase`]: char::to_uppercase
483    ToUppercase
484}
485
486#[derive(Debug, Clone)]
487struct CaseMappingIter(core::array::IntoIter<char, 3>);
488
489impl CaseMappingIter {
490    #[inline]
491    fn new(chars: [char; 3]) -> CaseMappingIter {
492        let mut iter = chars.into_iter();
493        if chars[2] == '\0' {
494            iter.next_back();
495            if chars[1] == '\0' {
496                iter.next_back();
497
498                // Deliberately don't check `chars[0]`,
499                // as '\0' lowercases to itself
500            }
501        }
502        CaseMappingIter(iter)
503    }
504}
505
506impl Iterator for CaseMappingIter {
507    type Item = char;
508
509    fn next(&mut self) -> Option<char> {
510        self.0.next()
511    }
512
513    fn size_hint(&self) -> (usize, Option<usize>) {
514        self.0.size_hint()
515    }
516
517    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
518    where
519        Fold: FnMut(Acc, Self::Item) -> Acc,
520    {
521        self.0.fold(init, fold)
522    }
523
524    fn count(self) -> usize {
525        self.0.count()
526    }
527
528    fn last(self) -> Option<Self::Item> {
529        self.0.last()
530    }
531
532    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
533        self.0.advance_by(n)
534    }
535
536    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
537        // SAFETY: just forwarding requirements to caller
538        unsafe { self.0.__iterator_get_unchecked(idx) }
539    }
540}
541
542impl DoubleEndedIterator for CaseMappingIter {
543    fn next_back(&mut self) -> Option<char> {
544        self.0.next_back()
545    }
546
547    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
548    where
549        Fold: FnMut(Acc, Self::Item) -> Acc,
550    {
551        self.0.rfold(init, rfold)
552    }
553
554    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
555        self.0.advance_back_by(n)
556    }
557}
558
559impl ExactSizeIterator for CaseMappingIter {
560    fn len(&self) -> usize {
561        self.0.len()
562    }
563
564    fn is_empty(&self) -> bool {
565        self.0.is_empty()
566    }
567}
568
569impl FusedIterator for CaseMappingIter {}
570
571// SAFETY: forwards to inner `array::IntoIter`
572unsafe impl TrustedLen for CaseMappingIter {}
573
574// SAFETY: forwards to inner `array::IntoIter`
575unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
576    const MAY_HAVE_SIDE_EFFECT: bool = false;
577}
578
579// SAFETY: `CaseMappingIter` has no subtypes/supertypes
580unsafe impl TrustedRandomAccess for CaseMappingIter {}
581
582impl fmt::Display for CaseMappingIter {
583    #[inline]
584    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
585        for c in self.0.clone() {
586            f.write_char(c)?;
587        }
588        Ok(())
589    }
590}
591
592/// The error type returned when a checked char conversion fails.
593#[stable(feature = "u8_from_char", since = "1.59.0")]
594#[derive(Debug, Copy, Clone, PartialEq, Eq)]
595pub struct TryFromCharError(pub(crate) ());
596
597#[stable(feature = "u8_from_char", since = "1.59.0")]
598impl fmt::Display for TryFromCharError {
599    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
600        "unicode code point out of range".fmt(fmt)
601    }
602}
603
604#[stable(feature = "u8_from_char", since = "1.59.0")]
605impl Error for TryFromCharError {}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy