Content-Length: 49600 | pFad | http://github.com/RustPython/RustPython/pull/5953.diff

thub.com diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 6f402513fd..3f82b515bb 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -8,18 +8,12 @@ from collections import UserList import random + class Sequence: def __init__(self, seq='wxyz'): self.seq = seq def __len__(self): return len(self.seq) def __getitem__(self, i): return self.seq[i] -class BadSeq1(Sequence): - def __init__(self): self.seq = [7, 'hello', 123] - def __str__(self): return '{0} {1} {2}'.format(*self.seq) - -class BadSeq2(Sequence): - def __init__(self): self.seq = ['a', 'b', 'c'] - def __len__(self): return 8 class BaseTest: # These tests are for buffers of values (bytes) and not @@ -27,7 +21,7 @@ class BaseTest: # and various string implementations # The type to be tested - # Change in subclasses to change the behaviour of fixtesttype() + # Change in subclasses to change the behaviour of fixtype() type2test = None # Whether the "contained items" of the container are integers in @@ -36,7 +30,7 @@ class BaseTest: contains_bytes = False # All tests pass their arguments to the testing methods - # as str objects. fixtesttype() can be used to propagate + # as str objects. fixtype() can be used to propagate # these arguments to the appropriate type def fixtype(self, obj): if isinstance(obj, str): @@ -160,6 +154,14 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) + # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count + @unittest.expectedFailure + def test_count_keyword(self): + self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) + self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) + self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2)) + self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3)) + def test_find(self): self.checkequal(0, 'abcdefghiabc', 'find', 'abc') self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) @@ -327,11 +329,12 @@ def reference_find(p, s): for i in range(len(s)): if s.startswith(p, i): return i + if p == '' and s == '': + return 0 return -1 - rr = random.randrange - choices = random.choices - for _ in range(1000): + def check_pattern(rr): + choices = random.choices p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20) p = p0[:len(p0) - rr(10)] # pop off some characters left = ''.join(choices('abcdef', k=rr(2000))) @@ -341,6 +344,49 @@ def reference_find(p, s): self.checkequal(reference_find(p, text), text, 'find', p) + rr = random.randrange + for _ in range(1000): + check_pattern(rr) + + # Test that empty string always work: + check_pattern(lambda *args: 0) + + def test_find_many_lengths(self): + haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)] + haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats] + + needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)] + needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats] + + for n, haystack1 in haystacks: + haystack2 = haystack1[:-1] + for m, needle in needles: + answer1 = 5 * (n - m) if m <= n else -1 + self.assertEqual(haystack1.find(needle), answer1, msg=(n,m)) + self.assertEqual(haystack2.find(needle), -1, msg=(n,m)) + + def test_adaptive_find(self): + # This would be very slow for the naive algorithm, + # but str.find() should be O(n + m). + for N in 1000, 10_000, 100_000, 1_000_000: + A, B = 'a' * N, 'b' * N + haystack = A + A + B + A + A + needle = A + B + B + A + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + + def test_find_with_memory(self): + # Test the "Skip with memory" path in the two-way algorithm. + for N in 1000, 3000, 10_000, 30_000: + needle = 'ab' * N + haystack = ('ab'*(N-1) + 'b') * 2 + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + def test_find_shift_table_overflow(self): """When the table of 8-bit shifts overflows.""" N = 2**8 + 100 @@ -724,6 +770,18 @@ def test_replace(self): self.checkraises(TypeError, 'hello', 'replace', 42, 'h') self.checkraises(TypeError, 'hello', 'replace', 'h', 42) + def test_replace_uses_two_way_maxcount(self): + # Test that maxcount works in _two_way_count in fastsearch.h + A, B = "A"*1000, "B"*1000 + AABAA = A + A + B + A + A + ABBA = A + B + B + A + self.checkequal(AABAA + ABBA, + AABAA + ABBA, 'replace', ABBA, "ccc", 0) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 1) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 2) + @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') @@ -734,8 +792,6 @@ def test_replace_overflow(self): self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - # Python 3.9 def test_removeprefix(self): self.checkequal('am', 'spam', 'removeprefix', 'sp') self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam') @@ -754,7 +810,6 @@ def test_removeprefix(self): self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42) self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l")) - # Python 3.9 def test_removesuffix(self): self.checkequal('sp', 'spam', 'removesuffix', 'am') self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam') @@ -1053,7 +1108,7 @@ def test_splitlines(self): self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) -class CommonTest(BaseTest): +class StringLikeTest(BaseTest): # This testcase contains tests that can be used in all # stringlike classes. Currently this is str and UserString. @@ -1084,11 +1139,6 @@ def test_capitalize_nonascii(self): self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') - -class MixinStrUnicodeUserStringTest: - # additional tests that only work for - # stringlike objects, i.e. str, UserString - def test_startswith(self): self.checkequal(True, 'hello', 'startswith', 'he') self.checkequal(True, 'hello', 'startswith', 'hello') @@ -1273,8 +1323,11 @@ def test_join(self): self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ('a' * i,) * i) - #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) - self.checkequal('a b c', ' ', 'join', BadSeq2()) + class LiesAboutLengthSeq(Sequence): + def __init__(self): self.seq = ['a', 'b', 'c'] + def __len__(self): return 8 + + self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq()) self.checkraises(TypeError, ' ', 'join') self.checkraises(TypeError, ' ', 'join', None) @@ -1459,19 +1512,19 @@ def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, + self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, + self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, + self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, + self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, + self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, + self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, + self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith, x, None, None, None) # issue #15534 diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 3c634b6cac..e84df546a8 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -10,6 +10,7 @@ import sys import copy import functools +import operator import pickle import tempfile import textwrap @@ -46,6 +47,10 @@ def __index__(self): class BaseBytesTest: + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_basics(self): b = self.type2test() self.assertEqual(type(b), self.type2test) @@ -737,6 +742,37 @@ def check(fmt, vals, result): check(b'%i%b %*.*b', (10, b'3', 5, 3, b'abc',), b'103 abc') check(b'%c', b'a', b'a') + class PseudoFloat: + def __init__(self, value): + self.value = float(value) + def __int__(self): + return int(self.value) + + pi = PseudoFloat(3.1415) + + exceptions_params = [ + ('%x format: an integer is required, not float', b'%x', 3.14), + ('%X format: an integer is required, not float', b'%X', 2.11), + ('%o format: an integer is required, not float', b'%o', 1.79), + ('%x format: an integer is required, not PseudoFloat', b'%x', pi), + ('%x format: an integer is required, not complex', b'%x', 3j), + ('%X format: an integer is required, not complex', b'%X', 2j), + ('%o format: an integer is required, not complex', b'%o', 1j), + ('%u format: a real number is required, not complex', b'%u', 3j), + # See https://github.com/python/cpython/issues/130928 as for why + # the exception message contains '%d' instead of '%i'. + ('%d format: a real number is required, not complex', b'%i', 2j), + ('%d format: a real number is required, not complex', b'%d', 2j), + ( + r'%c requires an integer in range\(256\) or a single byte', + b'%c', pi + ), + ] + + for msg, format_bytes, value in exceptions_params: + with self.assertRaisesRegex(TypeError, msg): + operator.mod(format_bytes, value) + def test_imod(self): b = self.type2test(b'hello, %b!') orig = b @@ -995,13 +1031,13 @@ def test_translate(self): self.assertEqual(c, b'hllo') def test_sq_item(self): - _testcapi = import_helper.import_module('_testcapi') + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') obj = self.type2test((42,)) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, -2) + _testlimitedcapi.sequence_getitem(obj, -2) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, 1) - self.assertEqual(_testcapi.sequence_getitem(obj, 0), 42) + _testlimitedcapi.sequence_getitem(obj, 1) + self.assertEqual(_testlimitedcapi.sequence_getitem(obj, 0), 42) class BytesTest(BaseBytesTest, unittest.TestCase): @@ -1031,36 +1067,63 @@ def test_buffer_is_readonly(self): self.assertRaises(TypeError, f.readinto, b"") def test_custom(self): - class A: - def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A()), b'abc') - class A: pass - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return None - self.assertRaises(TypeError, bytes, A()) - class A: + self.assertEqual(bytes(BytesSubclass(b'abc')), b'abc') + self.assertEqual(BytesSubclass(OtherBytesSubclass(b'abc')), + BytesSubclass(b'abc')) + self.assertEqual(bytes(WithBytes(b'abc')), b'abc') + self.assertEqual(BytesSubclass(WithBytes(b'abc')), BytesSubclass(b'abc')) + + class NoBytes: pass + self.assertRaises(TypeError, bytes, NoBytes()) + self.assertRaises(TypeError, bytes, WithBytes('abc')) + self.assertRaises(TypeError, bytes, WithBytes(None)) + class IndexWithBytes: def __bytes__(self): return b'a' def __index__(self): return 42 - self.assertEqual(bytes(A()), b'a') + self.assertEqual(bytes(IndexWithBytes()), b'a') # Issue #25766 - class A(str): + class StrWithBytes(str): + def __new__(cls, value): + self = str.__new__(cls, '\u20ac') + self.value = value + return self def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A('\u20ac')), b'abc') - self.assertEqual(bytes(A('\u20ac'), 'iso8859-15'), b'\xa4') + return self.value + self.assertEqual(bytes(StrWithBytes(b'abc')), b'abc') + self.assertEqual(bytes(StrWithBytes(b'abc'), 'iso8859-15'), b'\xa4') + self.assertEqual(bytes(StrWithBytes(BytesSubclass(b'abc'))), b'abc') + self.assertEqual(BytesSubclass(StrWithBytes(b'abc')), BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(b'abc'), 'iso8859-15'), + BytesSubclass(b'\xa4')) + self.assertEqual(BytesSubclass(StrWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Issue #24731 - class A: + self.assertTypedEqual(bytes(WithBytes(BytesSubclass(b'abc'))), BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + + class BytesWithBytes(bytes): + def __new__(cls, value): + self = bytes.__new__(cls, b'\xa4') + self.value = value + return self def __bytes__(self): - return OtherBytesSubclass(b'abc') - self.assertEqual(bytes(A()), b'abc') - self.assertIs(type(bytes(A())), OtherBytesSubclass) - self.assertEqual(BytesSubclass(A()), b'abc') - self.assertIs(type(BytesSubclass(A())), BytesSubclass) + return self.value + self.assertTypedEqual(bytes(BytesWithBytes(b'abc')), b'abc') + self.assertTypedEqual(BytesSubclass(BytesWithBytes(b'abc')), + BytesSubclass(b'abc')) + self.assertTypedEqual(bytes(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Test PyBytes_FromFormat() def test_from_format(self): @@ -1233,6 +1296,8 @@ class SubBytes(bytes): class ByteArrayTest(BaseBytesTest, unittest.TestCase): type2test = bytearray + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') + def test_getitem_error(self): b = bytearray(b'python') msg = "bytearray indices must be integers or slices" @@ -1325,47 +1390,73 @@ def by(s): self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")]) def test_setitem(self): - b = bytearray([1, 2, 3]) - b[1] = 100 - self.assertEqual(b, bytearray([1, 100, 3])) - b[-1] = 200 - self.assertEqual(b, bytearray([1, 100, 200])) - b[0] = Indexable(10) - self.assertEqual(b, bytearray([10, 100, 200])) - try: - b[3] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[-10] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[0] = 256 - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = Indexable(-1) - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = None - self.fail("Didn't raise TypeError") - except TypeError: - pass + def setitem_as_mapping(b, i, val): + b[i] = val + + def setitem_as_sequence(b, i, val): + self._testlimitedcapi.sequence_setitem(b, i, val) + + def do_tests(setitem): + b = bytearray([1, 2, 3]) + setitem(b, 1, 100) + self.assertEqual(b, bytearray([1, 100, 3])) + setitem(b, -1, 200) + self.assertEqual(b, bytearray([1, 100, 200])) + setitem(b, 0, Indexable(10)) + self.assertEqual(b, bytearray([10, 100, 200])) + try: + setitem(b, 3, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, -10, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, 0, 256) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, Indexable(-1)) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, object()) + self.fail("Didn't raise TypeError") + except TypeError: + pass + + with self.subTest("tp_as_mapping"): + do_tests(setitem_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(setitem_as_sequence) def test_delitem(self): - b = bytearray(range(10)) - del b[0] - self.assertEqual(b, bytearray(range(1, 10))) - del b[-1] - self.assertEqual(b, bytearray(range(1, 9))) - del b[4] - self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + def del_as_mapping(b, i): + del b[i] + + def del_as_sequence(b, i): + self._testlimitedcapi.sequence_delitem(b, i) + + def do_tests(delete): + b = bytearray(range(10)) + delete(b, 0) + self.assertEqual(b, bytearray(range(1, 10))) + delete(b, -1) + self.assertEqual(b, bytearray(range(1, 9))) + delete(b, 4) + self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + + with self.subTest("tp_as_mapping"): + do_tests(del_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(del_as_sequence) def test_setslice(self): b = bytearray(range(10)) @@ -1558,6 +1649,13 @@ def test_extend(self): a = bytearray(b'') a.extend([Indexable(ord('a'))]) self.assertEqual(a, b'a') + a = bytearray(b'abc') + self.assertRaisesRegex(TypeError, # Override for string. + "expected iterable of integers; got: 'str'", + a.extend, 'def') + self.assertRaisesRegex(TypeError, # But not for others. + "can't extend bytearray with float", + a.extend, 1.0) def test_remove(self): b = bytearray(b'hello') @@ -1747,6 +1845,8 @@ def test_repeat_after_setslice(self): self.assertEqual(b3, b'xcxcxc') def test_mutating_index(self): + # See gh-91153 + class Boom: def __index__(self): b.clear() @@ -1758,10 +1858,9 @@ def __index__(self): b[0] = Boom() with self.subTest("tp_as_sequence"): - _testcapi = import_helper.import_module('_testcapi') b = bytearray(b'Now you see me...') with self.assertRaises(IndexError): - _testcapi.sequence_setitem(b, 0, Boom()) + self._testlimitedcapi.sequence_setitem(b, 0, Boom()) class AssortedBytesTest(unittest.TestCase): @@ -2060,6 +2159,12 @@ class BytesSubclass(bytes): class OtherBytesSubclass(bytes): pass +class WithBytes: + def __init__(self, value): + self.value = value + def __bytes__(self): + return self.value + class ByteArraySubclassTest(SubclassTest, unittest.TestCase): basetype = bytearray type2test = ByteArraySubclass diff --git a/Lib/test/test_unicode.py b/Lib/test/test_str.py similarity index 94% rename from Lib/test/test_unicode.py rename to Lib/test/test_str.py index 1a8a8f7ee9..ef2d211a61 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_str.py @@ -7,6 +7,7 @@ """ import _string import codecs +import datetime import itertools import operator import pickle @@ -55,8 +56,22 @@ def duplicate_string(text): class StrSubclass(str): pass -class UnicodeTest(string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, +class OtherStrSubclass(str): + pass + +class WithStr: + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + +class WithRepr: + def __init__(self, value): + self.value = value + def __repr__(self): + return self.value + +class StrTest(string_tests.StringLikeTest, string_tests.MixinStrUnicodeTest, unittest.TestCase): @@ -84,6 +99,10 @@ def __repr__(self): self.assertEqual(realresult, result) self.assertTrue(object is not realresult) + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_literals(self): self.assertEqual('\xff', '\u00ff') self.assertEqual('\uffff', '\U0000ffff') @@ -93,6 +112,8 @@ def test_literals(self): # raw strings should not have unicode escapes self.assertNotEqual(r"\u0020", " ") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ascii(self): self.assertEqual(ascii('abc'), "'abc'") self.assertEqual(ascii('ab\\c'), "'ab\\\\c'") @@ -128,10 +149,13 @@ def test_ascii(self): self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096), ascii("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, ascii, WrongRepr()) + self.assertTypedEqual(ascii('\U0001f40d'), r"'\U0001f40d'") + self.assertTypedEqual(ascii(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(ascii(WithRepr('')), '') + self.assertTypedEqual(ascii(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(ascii(WithRepr('<\U0001f40d>')), r'<\U0001f40d>') + self.assertTypedEqual(ascii(WithRepr(StrSubclass('<\U0001f40d>'))), r'<\U0001f40d>') + self.assertRaises(TypeError, ascii, WithRepr(b'byte-repr')) def test_repr(self): # Test basic sanity of repr() @@ -169,10 +193,13 @@ def test_repr(self): self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096), repr("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, repr, WrongRepr()) + self.assertTypedEqual(repr('\U0001f40d'), "'\U0001f40d'") + self.assertTypedEqual(repr(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(repr(WithRepr('')), '') + self.assertTypedEqual(repr(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(repr(WithRepr('<\U0001f40d>')), '<\U0001f40d>') + self.assertTypedEqual(repr(WithRepr(StrSubclass('<\U0001f40d>'))), StrSubclass('<\U0001f40d>')) + self.assertRaises(TypeError, repr, WithRepr(b'byte-repr')) def test_iterators(self): # Make sure unicode objects have an __iter__ method @@ -213,7 +240,7 @@ def test_pickle_iterator(self): self.assertEqual(case, pickled) def test_count(self): - string_tests.CommonTest.test_count(self) + string_tests.StringLikeTest.test_count(self) # check mixed argument types self.checkequalnofix(3, 'aaa', 'count', 'a') self.checkequalnofix(0, 'aaa', 'count', 'b') @@ -243,7 +270,7 @@ class MyStr(str): self.checkequal(3, MyStr('aaa'), 'count', 'a') def test_find(self): - string_tests.CommonTest.test_find(self) + string_tests.StringLikeTest.test_find(self) # test implementation details of the memchr fast path self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102') self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201') @@ -288,7 +315,7 @@ def test_find(self): self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304') def test_rfind(self): - string_tests.CommonTest.test_rfind(self) + string_tests.StringLikeTest.test_rfind(self) # test implementation details of the memrchr fast path self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102') self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201') @@ -329,7 +356,7 @@ def test_rfind(self): self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102') def test_index(self): - string_tests.CommonTest.test_index(self) + string_tests.StringLikeTest.test_index(self) self.checkequalnofix(0, 'abcdefghiabc', 'index', '') self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def') self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc') @@ -353,7 +380,7 @@ def test_index(self): self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304') def test_rindex(self): - string_tests.CommonTest.test_rindex(self) + string_tests.StringLikeTest.test_rindex(self) self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '') self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def') self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc') @@ -449,7 +476,7 @@ def test_maketrans_translate(self): self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') def test_split(self): - string_tests.CommonTest.test_split(self) + string_tests.StringLikeTest.test_split(self) # test mixed kinds for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -466,7 +493,7 @@ def test_split(self): left + delim * 2 + right, 'split', delim *2) def test_rsplit(self): - string_tests.CommonTest.test_rsplit(self) + string_tests.StringLikeTest.test_rsplit(self) # test mixed kinds for left, right in ('ba', 'юё', '\u0101\u0100', '\U00010301\U00010300'): left *= 9 @@ -486,7 +513,7 @@ def test_rsplit(self): left + right, 'rsplit', None) def test_partition(self): - string_tests.MixinStrUnicodeUserStringTest.test_partition(self) + string_tests.StringLikeTest.test_partition(self) # test mixed kinds self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -503,7 +530,7 @@ def test_partition(self): left + delim * 2 + right, 'partition', delim * 2) def test_rpartition(self): - string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self) + string_tests.StringLikeTest.test_rpartition(self) # test mixed kinds self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -520,7 +547,7 @@ def test_rpartition(self): left + delim * 2 + right, 'rpartition', delim * 2) def test_join(self): - string_tests.MixinStrUnicodeUserStringTest.test_join(self) + string_tests.StringLikeTest.test_join(self) class MyWrapper: def __init__(self, sval): self.sval = sval @@ -548,7 +575,7 @@ def test_join_overflow(self): self.assertRaises(OverflowError, ''.join, seq) def test_replace(self): - string_tests.CommonTest.test_replace(self) + string_tests.StringLikeTest.test_replace(self) # method call forwarded from str implementation because of unicode argument self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) @@ -831,6 +858,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): @@ -859,7 +895,7 @@ def test_surrogates(self): def test_lower(self): - string_tests.CommonTest.test_lower(self) + string_tests.StringLikeTest.test_lower(self) self.assertEqual('\U00010427'.lower(), '\U0001044F') self.assertEqual('\U00010427\U00010427'.lower(), '\U0001044F\U0001044F') @@ -890,7 +926,7 @@ def test_casefold(self): self.assertEqual('\u00b5'.casefold(), '\u03bc') def test_upper(self): - string_tests.CommonTest.test_upper(self) + string_tests.StringLikeTest.test_upper(self) self.assertEqual('\U0001044F'.upper(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.upper(), '\U00010427\U00010427') @@ -909,7 +945,7 @@ def test_upper(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_capitalize(self): - string_tests.CommonTest.test_capitalize(self) + string_tests.StringLikeTest.test_capitalize(self) self.assertEqual('\U0001044F'.capitalize(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.capitalize(), '\U00010427\U0001044F') @@ -947,7 +983,7 @@ def test_title(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_swapcase(self): - string_tests.CommonTest.test_swapcase(self) + string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') self.assertEqual('\U00010427'.swapcase(), '\U0001044F') self.assertEqual('\U0001044F\U0001044F'.swapcase(), @@ -973,7 +1009,7 @@ def test_swapcase(self): self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300') def test_center(self): - string_tests.CommonTest.test_center(self) + string_tests.StringLikeTest.test_center(self) self.assertEqual('x'.center(2, '\U0010FFFF'), 'x\U0010FFFF') self.assertEqual('x'.center(3, '\U0010FFFF'), @@ -1483,7 +1519,7 @@ def __format__(self, spec): # TODO: RUSTPYTHON @unittest.expectedFailure def test_formatting(self): - string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) + string_tests.StringLikeTest.test_formatting(self) # Testing Unicode formatting strings... self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc') self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00') @@ -1659,7 +1695,7 @@ def test_startswith_endswith_errors(self): self.assertIn('str', exc) self.assertIn('tuple', exc) - @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') def test_format_float(self): # should not format with a comma, but always with C locale self.assertEqual('1.0', '%.1f' % 1.0) @@ -1730,8 +1766,6 @@ def __str__(self): 'character buffers are decoded to unicode' ) - self.assertRaises(TypeError, str, 42, 42, 42) - # TODO: RUSTPYTHON @unittest.expectedFailure def test_constructor_keyword_args(self): @@ -1910,6 +1944,12 @@ def test_utf8_decode_invalid_sequences(self): self.assertRaises(UnicodeDecodeError, (b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8') + def test_issue127903(self): + # gh-127903: ``_copy_characters`` crashes on DEBUG builds when + # there is nothing to copy. + d = datetime.datetime(2013, 11, 10, 14, 20, 59) + self.assertEqual(d.strftime('%z'), '') + def test_issue8271(self): # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered @@ -2396,28 +2436,37 @@ def test_ucs4(self): @unittest.expectedFailure def test_conversion(self): # Make sure __str__() works properly - class ObjectToStr: - def __str__(self): - return "foo" - - class StrSubclassToStr(str): - def __str__(self): - return "foo" - - class StrSubclassToStrSubclass(str): - def __new__(cls, content=""): - return str.__new__(cls, 2*content) - def __str__(self): + class StrWithStr(str): + def __new__(cls, value): + self = str.__new__(cls, "") + self.value = value return self + def __str__(self): + return self.value - self.assertEqual(str(ObjectToStr()), "foo") - self.assertEqual(str(StrSubclassToStr("bar")), "foo") - s = str(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclassToStrSubclass) - s = StrSubclass(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclass) + self.assertTypedEqual(str(WithStr('abc')), 'abc') + self.assertTypedEqual(str(WithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(StrWithStr('abc')), 'abc') + self.assertTypedEqual(str(StrWithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(WithRepr('')), '') + self.assertTypedEqual(str(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr('')), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(StrSubclass(''))), + StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(OtherStrSubclass(''))), + StrSubclass('')) def test_unicode_repr(self): class s1: @@ -2652,6 +2701,49 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_str_invalid_call(self): + # too many args + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str(1, "", "", 1) + + # no such kw arg + with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"): + str(test=1) + + # 'encoding' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, encoding=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", b"ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", encoding=b"ascii") + + # 'errors' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, "", errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", b"strict") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", errors=b"strict") + + # both positional and kwarg + with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"): + str(b"x", "utf-8", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "ignore", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "strict", errors="ignore") + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py index 80c22c6cdd..fe25bfe9f8 100644 --- a/Lib/test/test_unicode_file.py +++ b/Lib/test/test_unicode_file.py @@ -110,7 +110,7 @@ def _test_single(self, filename): os.unlink(filename) self.assertTrue(not os.path.exists(filename)) # and again with os.open. - f = os.open(filename, os.O_CREAT) + f = os.open(filename, os.O_CREAT | os.O_WRONLY) os.close(f) try: self._do_single(filename) diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py index 47619c8807..25c16e3a0b 100644 --- a/Lib/test/test_unicode_file_functions.py +++ b/Lib/test/test_unicode_file_functions.py @@ -5,7 +5,7 @@ import unittest import warnings from unicodedata import normalize -from test.support import os_helper +from test.support import is_apple, os_helper from test import support @@ -23,13 +23,13 @@ '10_\u1fee\u1ffd', ] -# Mac OS X decomposes Unicode names, using Normal Form D. +# Apple platforms decompose Unicode names, using Normal Form D. # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html # "However, most volume formats do not follow the exact specification for # these normal forms. For example, HFS Plus uses a variant of Normal Form D # in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through # U+2FAFF are not decomposed." -if sys.platform != 'darwin': +if not is_apple: filenames.extend([ # Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different '11_\u0385\u03d3\u03d4', @@ -119,11 +119,11 @@ def test_open(self): os.stat(name) self._apply_failure(os.listdir, name, self._listdir_failure) - # Skip the test on darwin, because darwin does normalize the filename to + # Skip the test on Apple platforms, because they don't normalize the filename to # NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC, # NFKD in Python is useless, because darwin will normalize it later and so # open(), os.stat(), etc. don't raise any exception. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') @unittest.skipIf( support.is_emscripten or support.is_wasi, "test fails on Emscripten/WASI when host platform is macOS." @@ -142,10 +142,10 @@ def test_normalize(self): self._apply_failure(os.remove, name) self._apply_failure(os.listdir, name) - # Skip the test on darwin, because darwin uses a normalization different + # Skip the test on Apple platforms, because they use a normalization different # than Python NFD normalization: filenames are different even if we use # Python NFD normalization. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') def test_listdir(self): sf0 = set(self.files) with warnings.catch_warnings(): diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index d7a0ece253..60cfdaabe8 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -21,7 +21,7 @@ def test_non_bmp_normalized(self): @unittest.expectedFailure def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 29da4a25a3..7f49c1690f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -11,15 +11,20 @@ import sys import unicodedata import unittest -from test.support import (open_urlresource, requires_resource, script_helper, - cpython_only, check_disallow_instantiation, - ResourceDenied) +from test.support import ( + open_urlresource, + requires_resource, + script_helper, + cpython_only, + check_disallow_instantiation, + force_not_colorized, +) class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326' + expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' # TODO: RUSTPYTHON @unittest.expectedFailure @@ -74,7 +79,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # Update this if the database changes. Make sure to do a full rebuild # (e.g. 'make distclean && make') to get the correct checksum. - expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370' + expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' + # TODO: RUSTPYTHON @unittest.expectedFailure @requires_resource('cpu') @@ -94,6 +100,8 @@ def test_function_checksum(self): self.db.decomposition(char), str(self.db.mirrored(char)), str(self.db.combining(char)), + unicodedata.east_asian_width(char), + self.db.name(char, ""), ] h.update(''.join(data).encode("ascii")) result = h.hexdigest() @@ -106,6 +114,28 @@ def test_name_inverse_lookup(self): if looked_name := self.db.name(char, None): self.assertEqual(self.db.lookup(looked_name), char) + def test_no_names_in_pua(self): + puas = [*range(0xe000, 0xf8ff), + *range(0xf0000, 0xfffff), + *range(0x100000, 0x10ffff)] + for i in puas: + char = chr(i) + self.assertRaises(ValueError, self.db.name, char) + + # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A' + @unittest.expectedFailure + def test_lookup_nonexistant(self): + # just make sure that lookup can fail + for nonexistant in [ + "LATIN SMLL LETR A", + "OPEN HANDS SIGHS", + "DREGS", + "HANDBUG", + "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK", + "???", + ]: + self.assertRaises(KeyError, self.db.lookup, nonexistant) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_digit(self): @@ -245,6 +275,25 @@ def test_east_asian_width(self): self.assertEqual(eaw('\u2010'), 'A') self.assertEqual(eaw('\U00020000'), 'W') + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_east_asian_width_unassigned(self): + eaw = self.db.east_asian_width + # unassigned + for char in '\u0530\u0ecf\u10c6\u20fc\uaaca\U000107bd\U000115f2': + self.assertEqual(eaw(char), 'N') + self.assertIs(self.db.name(char, None), None) + + # unassigned but reserved for CJK + for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD': + self.assertEqual(eaw(char), 'W') + self.assertIs(self.db.name(char, None), None) + + # private use areas + for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0': + self.assertEqual(eaw(char), 'A') + self.assertIs(self.db.name(char, None), None) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_east_asian_width_9_0_changes(self): @@ -260,6 +309,7 @@ def test_disallow_instantiation(self): # TODO: RUSTPYTHON @unittest.expectedFailure + @force_not_colorized def test_failed_import_during_compiling(self): # Issue 4367 # Decoding \N escapes requires the unicodedata module. If it can't be @@ -322,6 +372,7 @@ def test_ucd_510(self): self.assertTrue("\u1d79".upper()=='\ua77d') self.assertTrue(".".upper()=='.') + @requires_resource('cpu') def test_bug_5828(self): self.assertEqual("\u1d79".lower(), "\u1d79") # Only U+0000 should have U+0000 as its upper/lower/titlecase variant @@ -364,6 +415,7 @@ def unistr(data): return "".join([chr(x) for x in data]) @requires_resource('network') + @requires_resource('cpu') def test_normalization(self): TESTDATAFILE = "NormalizationTest.txt" TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}" diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 51b4f6041e..74df52f541 100644 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -7,8 +7,7 @@ from collections import UserString class UserStringTest( - string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, + string_tests.StringLikeTest, unittest.TestCase ):

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier! Saves Data!