-
-
Notifications
You must be signed in to change notification settings - Fork 32.4k
bpo-24665: double-width CJK chars support for textwrap #89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
6ea78e3
aa94f26
0264d9d
d630821
bfdfb22
8337ce5
cb9812b
54de7aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ | |
import unicodedata | ||
|
||
__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten', | ||
'cjkwide', 'cjklen', 'cjkslices'] | ||
'cjk_wide', 'cjk_len', 'cjk_slices'] | ||
|
||
# Hardcode the recognized whitespace characters to the US-ASCII | ||
# whitespace characters. The main reason for doing this is that | ||
|
@@ -146,7 +146,7 @@ def __init__(self, | |
self.placeholder = placeholder | ||
self.cjk = cjk | ||
|
||
self._width = cjklen if self.cjk else len | ||
self._width = cjk_len if self.cjk else len | ||
|
||
# -- Private methods ----------------------------------------------- | ||
# (possibly useful for subclasses to override) | ||
|
@@ -224,7 +224,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): | |
# of the next chunk onto the current line as will fit. | ||
if self.break_long_words: | ||
if self.cjk: | ||
chunk_start, chunk_end = cjkslices(reversed_chunks[-1], space_left) | ||
chunk_start, chunk_end = cjk_slices(reversed_chunks[-1], space_left) | ||
cur_line.append(chunk_start) | ||
reversed_chunks[-1] = chunk_end | ||
else: | ||
|
@@ -424,31 +424,31 @@ def shorten(text, width, cjk=False, **kwargs): | |
|
||
# -- CJK support ------------------------------------------------------ | ||
|
||
def cjkwide(char): | ||
def cjk_wide(char): | ||
"""Return True if char is Fullwidth or Wide, False otherwise. | ||
Fullwidth and Wide CJK chars are double-width. | ||
""" | ||
return unicodedata.east_asian_width(char) in ('F', 'W') | ||
|
||
|
||
def cjklen(text): | ||
def cjk_len(text): | ||
"""Return the real width of text (its len if not a string). | ||
""" | ||
if not isinstance(text, str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strange case handling, maybe we should expect only string type text argument in this function... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again: it's for an handy replacement of the built-in |
||
return len(text) | ||
return sum(2 if cjkwide(char) else 1 for char in text) | ||
return sum(2 if cjk_wide(char) else 1 for char in text) | ||
|
||
|
||
def cjkslices(text, index): | ||
def cjk_slices(text, index): | ||
"""Return the two slices of text cut to the index. | ||
""" | ||
if not isinstance(text, str): | ||
return text[:index], text[index:] | ||
if cjklen(text) <= index: | ||
if cjk_len(text) <= index: | ||
return text, '' | ||
i = 1 | ||
# <= and i-1 to catch the last double length char of odd line | ||
while cjklen(text[:i]) <= index: | ||
while cjk_len(text[:i]) <= index: | ||
i = i + 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like this O(n^2) algorithm.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very relevant point. |
||
return text[:i-1], text[i-1:] | ||
|
||
|
This comment was marked as resolved.
Sorry, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please, @duboviy or @Haypo, could you explain to me why/how a tuple could be slower than a frozenset.