diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst index 438007d0028d86..bbb87ed14ef8aa 100644 --- a/Doc/library/textwrap.rst +++ b/Doc/library/textwrap.rst @@ -117,6 +117,28 @@ functions should be good enough; otherwise, you should use an instance of .. versionadded:: 3.3 +.. function:: cjk_wide(char) + + Return ``True`` if *char* is Fullwidth or Wide, ``False`` otherwise. + Fullwidth and Wide CJK chars are double-width. + + .. versionadded:: 3.7 + + +.. function:: cjk_len(text) + + Return the real width of *text* (its len if not a string). + + .. versionadded:: 3.7 + + +.. function:: cjk_slices(text, index) + + Return the two slices of *text* cut to *index*. + + .. versionadded:: 3.7 + + :func:`wrap`, :func:`fill` and :func:`shorten` work by creating a :class:`TextWrapper` instance and calling a single method on it. That instance is not reused, so for applications that process many text @@ -276,6 +298,13 @@ hyphenated words; only then will long words be broken if necessary, unless .. versionadded:: 3.4 + .. attribute:: cjk + + (default: ``False``) Handle double-width CJK chars. + + .. versionadded:: 3.7 + + :class:`TextWrapper` also provides some public methods, analogous to the module-level convenience functions: diff --git a/Lib/idlelib/idle_test/test_calltips.py b/Lib/idlelib/idle_test/test_calltips.py index 0b11602ca9e414..1d06e0d0b5b578 100644 --- a/Lib/idlelib/idle_test/test_calltips.py +++ b/Lib/idlelib/idle_test/test_calltips.py @@ -72,7 +72,7 @@ def test_signature_wrap(self): (width=70, initial_indent='', subsequent_indent='', expand_tabs=True, replace_whitespace=True, fix_sentence_endings=False, break_long_words=True, drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None, - placeholder=' [...]')''') + placeholder=' [...]', cjk=False)''') def test_docline_truncation(self): def f(): pass diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index 5a33c151642c62..4a53d3fa7695f8 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -566,6 +566,10 @@ def setUp(self): self.text = '''\ Did you say "supercalifragilisticexpialidocious?" How *do* you spell that odd word, anyways? +''' + self.text_cjk = '''\ +Did you say "いろはにほへとちりぬるをいろはにほ?" +How りぬ るをいろはにほり ぬるは, anyways? ''' def test_break_long(self): @@ -579,6 +583,14 @@ def test_break_long(self): self.check_wrap(self.text, 50, ['Did you say "supercalifragilisticexpialidocious?"', 'How *do* you spell that odd word, anyways?']) + self.check_wrap(self.text_cjk, 30, + ['Did you say "いろはにほへとち', + 'りぬるをいろはにほ?" How りぬ', + 'るをいろはにほり ぬるは,', + 'anyways?'], cjk=True) + self.check_wrap(self.text_cjk, 50, + ['Did you say "いろはにほへとちりぬるをいろはにほ?"', + 'How りぬ るをいろはにほり ぬるは, anyways?'], cjk=True) # SF bug 797650. Prevent an infinite loop by making sure that at # least one character gets split off on every pass. diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 0c18dc582e17ae..1c6146abdee03d 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -3,11 +3,13 @@ # Copyright (C) 1999-2001 Gregory P. Ward. # Copyright (C) 2002, 2003 Python Software Foundation. +# Copyright (C) 2015-2017 Florent Gallaire # Written by Greg Ward import re -__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] +__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten', + 'cjk_wide', 'cjk_len', 'cjk_slices'] # Hardcode the recognized whitespace characters to the US-ASCII # whitespace characters. The main reason for doing this is that @@ -61,6 +63,8 @@ class TextWrapper: Truncate wrapped lines. placeholder (default: ' [...]') Append to the last line of truncated text. + cjk (default: false) + Handle double-width CJK chars. """ unicode_whitespace_trans = {} @@ -125,7 +129,8 @@ def __init__(self, tabsize=8, *, max_lines=None, - placeholder=' [...]'): + placeholder=' [...]', + cjk=False): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -138,7 +143,9 @@ def __init__(self, self.tabsize = tabsize self.max_lines = max_lines self.placeholder = placeholder + self.cjk = cjk + self._width = cjk_len if self.cjk else len # -- Private methods ----------------------------------------------- # (possibly useful for subclasses to override) @@ -215,8 +222,13 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: - cur_line.append(reversed_chunks[-1][:space_left]) - reversed_chunks[-1] = reversed_chunks[-1][space_left:] + if self.cjk: + chunk_start, chunk_end = cjk_slices(reversed_chunks[-1], space_left) + cur_line.append(chunk_start) + reversed_chunks[-1] = chunk_end + else: + cur_line.append(reversed_chunks[-1][:space_left]) + reversed_chunks[-1] = reversed_chunks[-1][space_left:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- @@ -246,6 +258,9 @@ def _wrap_chunks(self, chunks): lines = [] if self.width <= 0: raise ValueError("invalid width %r (must be > 0)" % self.width) + elif self.width == 1 and (sum(self._width(chunk) for chunk in chunks) > + sum(len(chunk) for chunk in chunks)): + raise ValueError("invalid width 1 (must be > 1 when CJK chars)") if self.max_lines is not None: if self.max_lines > 1: indent = self.subsequent_indent @@ -280,7 +295,7 @@ def _wrap_chunks(self, chunks): del chunks[-1] while chunks: - l = len(chunks[-1]) + l = self._width(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: @@ -293,7 +308,7 @@ def _wrap_chunks(self, chunks): # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: + if chunks and self._width(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) cur_len = sum(map(len, cur_line)) @@ -365,7 +380,7 @@ def fill(self, text): # -- Convenience interface --------------------------------------------- -def wrap(text, width=70, **kwargs): +def wrap(text, width=70, cjk=False, **kwargs): """Wrap a single paragraph of text, returning a list of wrapped lines. Reformat the single paragraph in 'text' so it fits in lines of no @@ -375,10 +390,10 @@ def wrap(text, width=70, **kwargs): space. See TextWrapper class for available keyword args to customize wrapping behaviour. """ - w = TextWrapper(width=width, **kwargs) + w = TextWrapper(width=width, cjk=cjk, **kwargs) return w.wrap(text) -def fill(text, width=70, **kwargs): +def fill(text, width=70, cjk=False, **kwargs): """Fill a single paragraph of text, returning a new string. Reformat the single paragraph in 'text' to fit in lines of no more @@ -387,10 +402,10 @@ def fill(text, width=70, **kwargs): whitespace characters converted to space. See TextWrapper class for available keyword args to customize wrapping behaviour. """ - w = TextWrapper(width=width, **kwargs) + w = TextWrapper(width=width, cjk=cjk, **kwargs) return w.fill(text) -def shorten(text, width, **kwargs): +def shorten(text, width, cjk=False, **kwargs): """Collapse and truncate the given text to fit in the given width. The text first has its whitespace collapsed. If it then fits in @@ -402,10 +417,43 @@ def shorten(text, width, **kwargs): >>> textwrap.shorten("Hello world!", width=11) 'Hello [...]' """ - w = TextWrapper(width=width, max_lines=1, **kwargs) + w = TextWrapper(width=width, cjk=cjk, max_lines=1, **kwargs) return w.fill(' '.join(text.strip().split())) +# -- CJK support ------------------------------------------------------ + +def cjk_wide(char): + """Return True if char is Fullwidth or Wide, False otherwise. + Fullwidth and Wide CJK chars are double-width. + """ + import unicodedata + return unicodedata.east_asian_width(char) in ('F', 'W') + + +def cjk_len(text): + """Return the real width of text (its len if not a string). + """ + if not isinstance(text, str): + return len(text) + return sum(2 if cjk_wide(char) else 1 for char in text) + + +def cjk_slices(text, index): + """Return the two slices of text cut to index. + """ + if not isinstance(text, str): + return text[:index], text[index:] + if cjk_len(text) <= index: + return text, '' + width = 0 + for i, char in enumerate(text): + width = width + cjk_wide(char) + 1 + if width > index: + break + return text[:i], text[i:] + + # -- Loosely related functionality ------------------------------------- _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) diff --git a/Misc/ACKS b/Misc/ACKS index 319128c9e9a4d4..127b0811307c2a 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -495,6 +495,7 @@ Lele Gaifax Santiago Gala Yitzchak Gale Matthew Gallagher +Florent Gallaire Quentin Gallet-Gilles Riccardo Attilio Galli Raymund Galvin pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy