Content-Length: 86587 | pFad | http://github.com/RustPython/RustPython/pull/5953.patch

thub.com From 46c7bf77c4377677b1dce1d370f2cff33ee1f9d2 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 12:09:19 +0300 Subject: [PATCH 1/3] Update str related tests from 3.13.5 --- Lib/test/string_tests.py | 120 ++++++---- Lib/test/test_bytes.py | 265 ++++++++++++++-------- Lib/test/{test_unicode.py => test_str.py} | 222 +++++++++++------- Lib/test/test_unicode_file.py | 2 +- Lib/test/test_unicode_file_functions.py | 14 +- Lib/test/test_unicode_identifiers.py | 4 +- Lib/test/test_unicodedata.py | 88 ++++--- Lib/test/test_userstring.py | 3 +- 8 files changed, 458 insertions(+), 260 deletions(-) rename Lib/test/{test_unicode.py => test_str.py} (94%) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 6f402513fd..9bb0ce7bb5 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -8,18 +8,12 @@ from collections import UserList import random + class Sequence: def __init__(self, seq='wxyz'): self.seq = seq def __len__(self): return len(self.seq) def __getitem__(self, i): return self.seq[i] -class BadSeq1(Sequence): - def __init__(self): self.seq = [7, 'hello', 123] - def __str__(self): return '{0} {1} {2}'.format(*self.seq) - -class BadSeq2(Sequence): - def __init__(self): self.seq = ['a', 'b', 'c'] - def __len__(self): return 8 class BaseTest: # These tests are for buffers of values (bytes) and not @@ -27,7 +21,7 @@ class BaseTest: # and various string implementations # The type to be tested - # Change in subclasses to change the behaviour of fixtesttype() + # Change in subclasses to change the behaviour of fixtype() type2test = None # Whether the "contained items" of the container are integers in @@ -36,7 +30,7 @@ class BaseTest: contains_bytes = False # All tests pass their arguments to the testing methods - # as str objects. fixtesttype() can be used to propagate + # as str objects. fixtype() can be used to propagate # these arguments to the appropriate type def fixtype(self, obj): if isinstance(obj, str): @@ -160,6 +154,12 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) + def test_count_keyword(self): + self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) + self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) + self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2)) + self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3)) + def test_find(self): self.checkequal(0, 'abcdefghiabc', 'find', 'abc') self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) @@ -327,11 +327,12 @@ def reference_find(p, s): for i in range(len(s)): if s.startswith(p, i): return i + if p == '' and s == '': + return 0 return -1 - rr = random.randrange - choices = random.choices - for _ in range(1000): + def check_pattern(rr): + choices = random.choices p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20) p = p0[:len(p0) - rr(10)] # pop off some characters left = ''.join(choices('abcdef', k=rr(2000))) @@ -341,6 +342,49 @@ def reference_find(p, s): self.checkequal(reference_find(p, text), text, 'find', p) + rr = random.randrange + for _ in range(1000): + check_pattern(rr) + + # Test that empty string always work: + check_pattern(lambda *args: 0) + + def test_find_many_lengths(self): + haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)] + haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats] + + needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)] + needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats] + + for n, haystack1 in haystacks: + haystack2 = haystack1[:-1] + for m, needle in needles: + answer1 = 5 * (n - m) if m <= n else -1 + self.assertEqual(haystack1.find(needle), answer1, msg=(n,m)) + self.assertEqual(haystack2.find(needle), -1, msg=(n,m)) + + def test_adaptive_find(self): + # This would be very slow for the naive algorithm, + # but str.find() should be O(n + m). + for N in 1000, 10_000, 100_000, 1_000_000: + A, B = 'a' * N, 'b' * N + haystack = A + A + B + A + A + needle = A + B + B + A + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + + def test_find_with_memory(self): + # Test the "Skip with memory" path in the two-way algorithm. + for N in 1000, 3000, 10_000, 30_000: + needle = 'ab' * N + haystack = ('ab'*(N-1) + 'b') * 2 + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + def test_find_shift_table_overflow(self): """When the table of 8-bit shifts overflows.""" N = 2**8 + 100 @@ -394,8 +438,7 @@ def test_expandtabs(self): self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42) # This test is only valid when sizeof(int) == sizeof(void*) == 4. - # XXX RUSTPYTHON TODO: expandtabs overflow checks - if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4 and False: + if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4: self.checkraises(OverflowError, '\ta\n\tb', 'expandtabs', sys.maxsize) @@ -724,7 +767,18 @@ def test_replace(self): self.checkraises(TypeError, 'hello', 'replace', 42, 'h') self.checkraises(TypeError, 'hello', 'replace', 'h', 42) - @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms") + def test_replace_uses_two_way_maxcount(self): + # Test that maxcount works in _two_way_count in fastsearch.h + A, B = "A"*1000, "B"*1000 + AABAA = A + A + B + A + A + ABBA = A + B + B + A + self.checkequal(AABAA + ABBA, + AABAA + ABBA, 'replace', ABBA, "ccc", 0) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 1) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 2) + @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') def test_replace_overflow(self): @@ -734,8 +788,6 @@ def test_replace_overflow(self): self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - # Python 3.9 def test_removeprefix(self): self.checkequal('am', 'spam', 'removeprefix', 'sp') self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam') @@ -754,7 +806,6 @@ def test_removeprefix(self): self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42) self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l")) - # Python 3.9 def test_removesuffix(self): self.checkequal('sp', 'spam', 'removesuffix', 'am') self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam') @@ -1053,7 +1104,7 @@ def test_splitlines(self): self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) -class CommonTest(BaseTest): +class StringLikeTest(BaseTest): # This testcase contains tests that can be used in all # stringlike classes. Currently this is str and UserString. @@ -1084,11 +1135,6 @@ def test_capitalize_nonascii(self): self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') - -class MixinStrUnicodeUserStringTest: - # additional tests that only work for - # stringlike objects, i.e. str, UserString - def test_startswith(self): self.checkequal(True, 'hello', 'startswith', 'he') self.checkequal(True, 'hello', 'startswith', 'hello') @@ -1200,9 +1246,6 @@ def test___contains__(self): self.checkequal(False, 'asd', '__contains__', 'asdf') self.checkequal(False, '', '__contains__', 'asdf') - - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_subscript(self): self.checkequal('a', 'abc', '__getitem__', 0) self.checkequal('c', 'abc', '__getitem__', -1) @@ -1273,8 +1316,11 @@ def test_join(self): self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ('a' * i,) * i) - #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) - self.checkequal('a b c', ' ', 'join', BadSeq2()) + class LiesAboutLengthSeq(Sequence): + def __init__(self): self.seq = ['a', 'b', 'c'] + def __len__(self): return 8 + + self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq()) self.checkraises(TypeError, ' ', 'join') self.checkraises(TypeError, ' ', 'join', None) @@ -1453,25 +1499,23 @@ def test_none_arguments(self): self.checkequal(True, s, 'startswith', 'h', None, -2) self.checkequal(False, s, 'startswith', 'x', None, None) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, + self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, + self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, + self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, + self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, + self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, + self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, + self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith, x, None, None, None) # issue #15534 diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 3c634b6cac..f8edfe0a17 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -10,6 +10,7 @@ import sys import copy import functools +import operator import pickle import tempfile import textwrap @@ -46,6 +47,10 @@ def __index__(self): class BaseBytesTest: + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_basics(self): b = self.type2test() self.assertEqual(type(b), self.type2test) @@ -196,8 +201,6 @@ def test_constructor_value_errors(self): self.assertRaises(ValueError, self.type2test, [sys.maxsize+1]) self.assertRaises(ValueError, self.type2test, [10**100]) - # TODO: RUSTPYTHON - @unittest.expectedFailure @bigaddrspacetest def test_constructor_overflow(self): size = MAX_Py_ssize_t @@ -321,8 +324,6 @@ def test_decode(self): # Default encoding is utf-8 self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_check_encoding_errors(self): # bpo-37388: bytes(str) and bytes.encode() must check encoding # and errors arguments in dev mode @@ -737,6 +738,37 @@ def check(fmt, vals, result): check(b'%i%b %*.*b', (10, b'3', 5, 3, b'abc',), b'103 abc') check(b'%c', b'a', b'a') + class PseudoFloat: + def __init__(self, value): + self.value = float(value) + def __int__(self): + return int(self.value) + + pi = PseudoFloat(3.1415) + + exceptions_params = [ + ('%x format: an integer is required, not float', b'%x', 3.14), + ('%X format: an integer is required, not float', b'%X', 2.11), + ('%o format: an integer is required, not float', b'%o', 1.79), + ('%x format: an integer is required, not PseudoFloat', b'%x', pi), + ('%x format: an integer is required, not complex', b'%x', 3j), + ('%X format: an integer is required, not complex', b'%X', 2j), + ('%o format: an integer is required, not complex', b'%o', 1j), + ('%u format: a real number is required, not complex', b'%u', 3j), + # See https://github.com/python/cpython/issues/130928 as for why + # the exception message contains '%d' instead of '%i'. + ('%d format: a real number is required, not complex', b'%i', 2j), + ('%d format: a real number is required, not complex', b'%d', 2j), + ( + r'%c requires an integer in range\(256\) or a single byte', + b'%c', pi + ), + ] + + for msg, format_bytes, value in exceptions_params: + with self.assertRaisesRegex(TypeError, msg): + operator.mod(format_bytes, value) + def test_imod(self): b = self.type2test(b'hello, %b!') orig = b @@ -936,8 +968,6 @@ def test_integer_arguments_out_of_byte_range(self): self.assertRaises(ValueError, method, 256) self.assertRaises(ValueError, method, 9999) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_find_etc_raise_correct_error_messages(self): # issue 11828 b = self.type2test(b'hello') @@ -957,8 +987,6 @@ def test_find_etc_raise_correct_error_messages(self): self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith, x, None, None, None) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_free_after_iterating(self): test.support.check_free_after_iterating(self, iter, self.type2test) test.support.check_free_after_iterating(self, reversed, self.type2test) @@ -995,13 +1023,13 @@ def test_translate(self): self.assertEqual(c, b'hllo') def test_sq_item(self): - _testcapi = import_helper.import_module('_testcapi') + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') obj = self.type2test((42,)) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, -2) + _testlimitedcapi.sequence_getitem(obj, -2) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, 1) - self.assertEqual(_testcapi.sequence_getitem(obj, 0), 42) + _testlimitedcapi.sequence_getitem(obj, 1) + self.assertEqual(_testlimitedcapi.sequence_getitem(obj, 0), 42) class BytesTest(BaseBytesTest, unittest.TestCase): @@ -1031,36 +1059,63 @@ def test_buffer_is_readonly(self): self.assertRaises(TypeError, f.readinto, b"") def test_custom(self): - class A: - def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A()), b'abc') - class A: pass - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return None - self.assertRaises(TypeError, bytes, A()) - class A: + self.assertEqual(bytes(BytesSubclass(b'abc')), b'abc') + self.assertEqual(BytesSubclass(OtherBytesSubclass(b'abc')), + BytesSubclass(b'abc')) + self.assertEqual(bytes(WithBytes(b'abc')), b'abc') + self.assertEqual(BytesSubclass(WithBytes(b'abc')), BytesSubclass(b'abc')) + + class NoBytes: pass + self.assertRaises(TypeError, bytes, NoBytes()) + self.assertRaises(TypeError, bytes, WithBytes('abc')) + self.assertRaises(TypeError, bytes, WithBytes(None)) + class IndexWithBytes: def __bytes__(self): return b'a' def __index__(self): return 42 - self.assertEqual(bytes(A()), b'a') + self.assertEqual(bytes(IndexWithBytes()), b'a') # Issue #25766 - class A(str): + class StrWithBytes(str): + def __new__(cls, value): + self = str.__new__(cls, '\u20ac') + self.value = value + return self def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A('\u20ac')), b'abc') - self.assertEqual(bytes(A('\u20ac'), 'iso8859-15'), b'\xa4') + return self.value + self.assertEqual(bytes(StrWithBytes(b'abc')), b'abc') + self.assertEqual(bytes(StrWithBytes(b'abc'), 'iso8859-15'), b'\xa4') + self.assertEqual(bytes(StrWithBytes(BytesSubclass(b'abc'))), b'abc') + self.assertEqual(BytesSubclass(StrWithBytes(b'abc')), BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(b'abc'), 'iso8859-15'), + BytesSubclass(b'\xa4')) + self.assertEqual(BytesSubclass(StrWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Issue #24731 - class A: + self.assertTypedEqual(bytes(WithBytes(BytesSubclass(b'abc'))), BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + + class BytesWithBytes(bytes): + def __new__(cls, value): + self = bytes.__new__(cls, b'\xa4') + self.value = value + return self def __bytes__(self): - return OtherBytesSubclass(b'abc') - self.assertEqual(bytes(A()), b'abc') - self.assertIs(type(bytes(A())), OtherBytesSubclass) - self.assertEqual(BytesSubclass(A()), b'abc') - self.assertIs(type(BytesSubclass(A())), BytesSubclass) + return self.value + self.assertTypedEqual(bytes(BytesWithBytes(b'abc')), b'abc') + self.assertTypedEqual(BytesSubclass(BytesWithBytes(b'abc')), + BytesSubclass(b'abc')) + self.assertTypedEqual(bytes(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Test PyBytes_FromFormat() def test_from_format(self): @@ -1233,6 +1288,8 @@ class SubBytes(bytes): class ByteArrayTest(BaseBytesTest, unittest.TestCase): type2test = bytearray + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') + def test_getitem_error(self): b = bytearray(b'python') msg = "bytearray indices must be integers or slices" @@ -1325,47 +1382,73 @@ def by(s): self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")]) def test_setitem(self): - b = bytearray([1, 2, 3]) - b[1] = 100 - self.assertEqual(b, bytearray([1, 100, 3])) - b[-1] = 200 - self.assertEqual(b, bytearray([1, 100, 200])) - b[0] = Indexable(10) - self.assertEqual(b, bytearray([10, 100, 200])) - try: - b[3] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[-10] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[0] = 256 - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = Indexable(-1) - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = None - self.fail("Didn't raise TypeError") - except TypeError: - pass + def setitem_as_mapping(b, i, val): + b[i] = val + + def setitem_as_sequence(b, i, val): + self._testlimitedcapi.sequence_setitem(b, i, val) + + def do_tests(setitem): + b = bytearray([1, 2, 3]) + setitem(b, 1, 100) + self.assertEqual(b, bytearray([1, 100, 3])) + setitem(b, -1, 200) + self.assertEqual(b, bytearray([1, 100, 200])) + setitem(b, 0, Indexable(10)) + self.assertEqual(b, bytearray([10, 100, 200])) + try: + setitem(b, 3, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, -10, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, 0, 256) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, Indexable(-1)) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, object()) + self.fail("Didn't raise TypeError") + except TypeError: + pass + + with self.subTest("tp_as_mapping"): + do_tests(setitem_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(setitem_as_sequence) def test_delitem(self): - b = bytearray(range(10)) - del b[0] - self.assertEqual(b, bytearray(range(1, 10))) - del b[-1] - self.assertEqual(b, bytearray(range(1, 9))) - del b[4] - self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + def del_as_mapping(b, i): + del b[i] + + def del_as_sequence(b, i): + self._testlimitedcapi.sequence_delitem(b, i) + + def do_tests(delete): + b = bytearray(range(10)) + delete(b, 0) + self.assertEqual(b, bytearray(range(1, 10))) + delete(b, -1) + self.assertEqual(b, bytearray(range(1, 9))) + delete(b, 4) + self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + + with self.subTest("tp_as_mapping"): + do_tests(del_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(del_as_sequence) def test_setslice(self): b = bytearray(range(10)) @@ -1492,11 +1575,6 @@ def test_irepeat_1char(self): self.assertEqual(b, b1) self.assertIs(b, b1) - # NOTE: RUSTPYTHON: - # - # The second instance of self.assertGreater was replaced with - # self.assertGreaterEqual since, in RustPython, the underlying storage - # is a Vec which doesn't require trailing null byte. def test_alloc(self): b = bytearray() alloc = b.__alloc__() @@ -1505,15 +1583,10 @@ def test_alloc(self): for i in range(100): b += b"x" alloc = b.__alloc__() - self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched + self.assertGreater(alloc, len(b)) # including trailing null byte if alloc not in seq: seq.append(alloc) - # NOTE: RUSTPYTHON: - # - # The usages of self.assertGreater were replaced with - # self.assertGreaterEqual since, in RustPython, the underlying storage - # is a Vec which doesn't require trailing null byte. def test_init_alloc(self): b = bytearray() def g(): @@ -1524,12 +1597,12 @@ def g(): self.assertEqual(len(b), len(a)) self.assertLessEqual(len(b), i) alloc = b.__alloc__() - self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched + self.assertGreater(alloc, len(b)) # including trailing null byte b.__init__(g()) self.assertEqual(list(b), list(range(1, 100))) self.assertEqual(len(b), 99) alloc = b.__alloc__() - self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched + self.assertGreater(alloc, len(b)) def test_extend(self): orig = b'hello' @@ -1558,6 +1631,13 @@ def test_extend(self): a = bytearray(b'') a.extend([Indexable(ord('a'))]) self.assertEqual(a, b'a') + a = bytearray(b'abc') + self.assertRaisesRegex(TypeError, # Override for string. + "expected iterable of integers; got: 'str'", + a.extend, 'def') + self.assertRaisesRegex(TypeError, # But not for others. + "can't extend bytearray with float", + a.extend, 1.0) def test_remove(self): b = bytearray(b'hello') @@ -1747,6 +1827,8 @@ def test_repeat_after_setslice(self): self.assertEqual(b3, b'xcxcxc') def test_mutating_index(self): + # See gh-91153 + class Boom: def __index__(self): b.clear() @@ -1758,10 +1840,9 @@ def __index__(self): b[0] = Boom() with self.subTest("tp_as_sequence"): - _testcapi = import_helper.import_module('_testcapi') b = bytearray(b'Now you see me...') with self.assertRaises(IndexError): - _testcapi.sequence_setitem(b, 0, Boom()) + self._testlimitedcapi.sequence_setitem(b, 0, Boom()) class AssortedBytesTest(unittest.TestCase): @@ -1990,7 +2071,6 @@ def test_join(self): s3 = s1.join([b"abcd"]) self.assertIs(type(s3), self.basetype) - @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_pickle(self): a = self.type2test(b"abcd") a.x = 10 @@ -2005,7 +2085,6 @@ def test_pickle(self): self.assertEqual(type(a.z), type(b.z)) self.assertFalse(hasattr(b, 'y')) - @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_copy(self): a = self.type2test(b"abcd") a.x = 10 @@ -2060,6 +2139,12 @@ class BytesSubclass(bytes): class OtherBytesSubclass(bytes): pass +class WithBytes: + def __init__(self, value): + self.value = value + def __bytes__(self): + return self.value + class ByteArraySubclassTest(SubclassTest, unittest.TestCase): basetype = bytearray type2test = ByteArraySubclass diff --git a/Lib/test/test_unicode.py b/Lib/test/test_str.py similarity index 94% rename from Lib/test/test_unicode.py rename to Lib/test/test_str.py index 1a8a8f7ee9..46673cc56a 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_str.py @@ -7,6 +7,7 @@ """ import _string import codecs +import datetime import itertools import operator import pickle @@ -55,8 +56,22 @@ def duplicate_string(text): class StrSubclass(str): pass -class UnicodeTest(string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, +class OtherStrSubclass(str): + pass + +class WithStr: + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + +class WithRepr: + def __init__(self, value): + self.value = value + def __repr__(self): + return self.value + +class StrTest(string_tests.StringLikeTest, string_tests.MixinStrUnicodeTest, unittest.TestCase): @@ -84,6 +99,10 @@ def __repr__(self): self.assertEqual(realresult, result) self.assertTrue(object is not realresult) + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_literals(self): self.assertEqual('\xff', '\u00ff') self.assertEqual('\uffff', '\U0000ffff') @@ -128,10 +147,13 @@ def test_ascii(self): self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096), ascii("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, ascii, WrongRepr()) + self.assertTypedEqual(ascii('\U0001f40d'), r"'\U0001f40d'") + self.assertTypedEqual(ascii(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(ascii(WithRepr('')), '') + self.assertTypedEqual(ascii(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(ascii(WithRepr('<\U0001f40d>')), r'<\U0001f40d>') + self.assertTypedEqual(ascii(WithRepr(StrSubclass('<\U0001f40d>'))), r'<\U0001f40d>') + self.assertRaises(TypeError, ascii, WithRepr(b'byte-repr')) def test_repr(self): # Test basic sanity of repr() @@ -169,10 +191,13 @@ def test_repr(self): self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096), repr("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, repr, WrongRepr()) + self.assertTypedEqual(repr('\U0001f40d'), "'\U0001f40d'") + self.assertTypedEqual(repr(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(repr(WithRepr('')), '') + self.assertTypedEqual(repr(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(repr(WithRepr('<\U0001f40d>')), '<\U0001f40d>') + self.assertTypedEqual(repr(WithRepr(StrSubclass('<\U0001f40d>'))), StrSubclass('<\U0001f40d>')) + self.assertRaises(TypeError, repr, WithRepr(b'byte-repr')) def test_iterators(self): # Make sure unicode objects have an __iter__ method @@ -213,7 +238,7 @@ def test_pickle_iterator(self): self.assertEqual(case, pickled) def test_count(self): - string_tests.CommonTest.test_count(self) + string_tests.StringLikeTest.test_count(self) # check mixed argument types self.checkequalnofix(3, 'aaa', 'count', 'a') self.checkequalnofix(0, 'aaa', 'count', 'b') @@ -243,7 +268,7 @@ class MyStr(str): self.checkequal(3, MyStr('aaa'), 'count', 'a') def test_find(self): - string_tests.CommonTest.test_find(self) + string_tests.StringLikeTest.test_find(self) # test implementation details of the memchr fast path self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102') self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201') @@ -288,7 +313,7 @@ def test_find(self): self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304') def test_rfind(self): - string_tests.CommonTest.test_rfind(self) + string_tests.StringLikeTest.test_rfind(self) # test implementation details of the memrchr fast path self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102') self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201') @@ -329,7 +354,7 @@ def test_rfind(self): self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102') def test_index(self): - string_tests.CommonTest.test_index(self) + string_tests.StringLikeTest.test_index(self) self.checkequalnofix(0, 'abcdefghiabc', 'index', '') self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def') self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc') @@ -353,7 +378,7 @@ def test_index(self): self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304') def test_rindex(self): - string_tests.CommonTest.test_rindex(self) + string_tests.StringLikeTest.test_rindex(self) self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '') self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def') self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc') @@ -449,7 +474,7 @@ def test_maketrans_translate(self): self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') def test_split(self): - string_tests.CommonTest.test_split(self) + string_tests.StringLikeTest.test_split(self) # test mixed kinds for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -466,7 +491,7 @@ def test_split(self): left + delim * 2 + right, 'split', delim *2) def test_rsplit(self): - string_tests.CommonTest.test_rsplit(self) + string_tests.StringLikeTest.test_rsplit(self) # test mixed kinds for left, right in ('ba', 'юё', '\u0101\u0100', '\U00010301\U00010300'): left *= 9 @@ -486,7 +511,7 @@ def test_rsplit(self): left + right, 'rsplit', None) def test_partition(self): - string_tests.MixinStrUnicodeUserStringTest.test_partition(self) + string_tests.StringLikeTest.test_partition(self) # test mixed kinds self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -503,7 +528,7 @@ def test_partition(self): left + delim * 2 + right, 'partition', delim * 2) def test_rpartition(self): - string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self) + string_tests.StringLikeTest.test_rpartition(self) # test mixed kinds self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -520,7 +545,7 @@ def test_rpartition(self): left + delim * 2 + right, 'rpartition', delim * 2) def test_join(self): - string_tests.MixinStrUnicodeUserStringTest.test_join(self) + string_tests.StringLikeTest.test_join(self) class MyWrapper: def __init__(self, sval): self.sval = sval @@ -539,7 +564,6 @@ def __str__(self): return self.sval self.checkraises(TypeError, ' ', 'join', [1, 2, 3]) self.checkraises(TypeError, ' ', 'join', ['1', '2', 3]) - @unittest.skip("TODO: RUSTPYTHON, oom handling") @unittest.skipIf(sys.maxsize > 2**32, 'needs too much memory on a 64-bit platform') def test_join_overflow(self): @@ -548,7 +572,7 @@ def test_join_overflow(self): self.assertRaises(OverflowError, ''.join, seq) def test_replace(self): - string_tests.CommonTest.test_replace(self) + string_tests.StringLikeTest.test_replace(self) # method call forwarded from str implementation because of unicode argument self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) @@ -768,8 +792,6 @@ def test_isdecimal(self): for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']: self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_isdigit(self): super().test_isdigit() self.checkequalnofix(True, '\u2460', 'isdigit') @@ -831,6 +853,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): @@ -859,7 +890,7 @@ def test_surrogates(self): def test_lower(self): - string_tests.CommonTest.test_lower(self) + string_tests.StringLikeTest.test_lower(self) self.assertEqual('\U00010427'.lower(), '\U0001044F') self.assertEqual('\U00010427\U00010427'.lower(), '\U0001044F\U0001044F') @@ -890,7 +921,7 @@ def test_casefold(self): self.assertEqual('\u00b5'.casefold(), '\u03bc') def test_upper(self): - string_tests.CommonTest.test_upper(self) + string_tests.StringLikeTest.test_upper(self) self.assertEqual('\U0001044F'.upper(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.upper(), '\U00010427\U00010427') @@ -906,10 +937,8 @@ def test_upper(self): self.assertEqual('\U0008fffe'.upper(), '\U0008fffe') self.assertEqual('\u2177'.upper(), '\u2167') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_capitalize(self): - string_tests.CommonTest.test_capitalize(self) + string_tests.StringLikeTest.test_capitalize(self) self.assertEqual('\U0001044F'.capitalize(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.capitalize(), '\U00010427\U0001044F') @@ -925,8 +954,6 @@ def test_capitalize(self): self.assertEqual('ﬁnnish'.capitalize(), 'Finnish') self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_title(self): super().test_title() self.assertEqual('\U0001044F'.title(), '\U00010427') @@ -944,10 +971,8 @@ def test_title(self): self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy') self.assertEqual('A\u03a3A'.title(), 'A\u03c3a') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_swapcase(self): - string_tests.CommonTest.test_swapcase(self) + string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') self.assertEqual('\U00010427'.swapcase(), '\U0001044F') self.assertEqual('\U0001044F\U0001044F'.swapcase(), @@ -973,7 +998,7 @@ def test_swapcase(self): self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300') def test_center(self): - string_tests.CommonTest.test_center(self) + string_tests.StringLikeTest.test_center(self) self.assertEqual('x'.center(2, '\U0010FFFF'), 'x\U0010FFFF') self.assertEqual('x'.center(3, '\U0010FFFF'), @@ -1045,8 +1070,6 @@ def test_issue18183(self): '\U00100000'.ljust(3, '\U00010000') '\U00100000'.rjust(3, '\U00010000') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format(self): self.assertEqual(''.format(), '') self.assertEqual('a'.format(), 'a') @@ -1430,21 +1453,16 @@ def __getitem__(self, key): self.assertRaises(TypeError, '{a}'.format_map, []) self.assertRaises(ZeroDivisionError, '{a}'.format_map, BadMapping()) - @unittest.skip("TODO: RUSTPYTHON, killed for chewing up RAM") def test_format_huge_precision(self): format_string = ".{}f".format(sys.maxsize + 1) with self.assertRaises(ValueError): result = format(2.34, format_string) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_huge_width(self): format_string = "{}f".format(sys.maxsize + 1) with self.assertRaises(ValueError): result = format(2.34, format_string) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_huge_item_number(self): format_string = "{{{}:.6f}}".format(sys.maxsize + 1) with self.assertRaises(ValueError): @@ -1480,10 +1498,8 @@ def __format__(self, spec): self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3') self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_formatting(self): - string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) + string_tests.StringLikeTest.test_formatting(self) # Testing Unicode formatting strings... self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc') self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00') @@ -1659,7 +1675,7 @@ def test_startswith_endswith_errors(self): self.assertIn('str', exc) self.assertIn('tuple', exc) - @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') def test_format_float(self): # should not format with a comma, but always with C locale self.assertEqual('1.0', '%.1f' % 1.0) @@ -1730,10 +1746,6 @@ def __str__(self): 'character buffers are decoded to unicode' ) - self.assertRaises(TypeError, str, 42, 42, 42) - - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor_keyword_args(self): """Pass various keyword argument combinations to the constructor.""" # The object argument can be passed as a keyword. @@ -1743,8 +1755,6 @@ def test_constructor_keyword_args(self): self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" self.assertEqual(str(object=b'foo', errors='strict'), 'foo') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor_defaults(self): """Check the constructor argument defaults.""" # The object argument defaults to '' or b''. @@ -1756,8 +1766,6 @@ def test_constructor_defaults(self): # The errors argument defaults to strict. self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_codecs_utf7(self): utfTests = [ ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example @@ -1910,6 +1918,12 @@ def test_utf8_decode_invalid_sequences(self): self.assertRaises(UnicodeDecodeError, (b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8') + def test_issue127903(self): + # gh-127903: ``_copy_characters`` crashes on DEBUG builds when + # there is nothing to copy. + d = datetime.datetime(2013, 11, 10, 14, 20, 59) + self.assertEqual(d.strftime('%z'), '') + def test_issue8271(self): # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered @@ -2261,8 +2275,6 @@ def test_codecs_errors(self): self.assertRaises(ValueError, complex, "\ud800") self.assertRaises(ValueError, complex, "\udf00") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_codecs(self): # Encoding self.assertEqual('hello'.encode('ascii'), b'hello') @@ -2392,32 +2404,39 @@ def test_ucs4(self): else: self.fail("Should have raised UnicodeDecodeError") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_conversion(self): # Make sure __str__() works properly - class ObjectToStr: - def __str__(self): - return "foo" - - class StrSubclassToStr(str): - def __str__(self): - return "foo" - - class StrSubclassToStrSubclass(str): - def __new__(cls, content=""): - return str.__new__(cls, 2*content) - def __str__(self): + class StrWithStr(str): + def __new__(cls, value): + self = str.__new__(cls, "") + self.value = value return self + def __str__(self): + return self.value - self.assertEqual(str(ObjectToStr()), "foo") - self.assertEqual(str(StrSubclassToStr("bar")), "foo") - s = str(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclassToStrSubclass) - s = StrSubclass(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclass) + self.assertTypedEqual(str(WithStr('abc')), 'abc') + self.assertTypedEqual(str(WithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(StrWithStr('abc')), 'abc') + self.assertTypedEqual(str(StrWithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(WithRepr('')), '') + self.assertTypedEqual(str(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr('')), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(StrSubclass(''))), + StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(OtherStrSubclass(''))), + StrSubclass('')) def test_unicode_repr(self): class s1: @@ -2433,7 +2452,6 @@ def test_printable_repr(self): # This test only affects 32-bit platforms because expandtabs can only take # an int as the max value, not a 64-bit C long. If expandtabs is changed # to take a 64-bit long, this test should apply to all platforms. - @unittest.skip("TODO: RUSTPYTHON, oom handling") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') def test_expandtabs_overflows_gracefully(self): @@ -2444,7 +2462,6 @@ def test_expandtabs_optimization(self): s = 'abc' self.assertIs(s.expandtabs(), s) - @unittest.skip("TODO: RUSTPYTHON, aborted: memory allocation of 9223372036854775759 bytes failed") def test_raiseMemError(self): asciifields = "nnb" compactfields = asciifields + "nP" @@ -2584,14 +2601,10 @@ def test_compare(self): self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_free_after_iterating(self): support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_check_encoding_errors(self): # bpo-37388: str(bytes) and str.decode() must check encoding and errors # arguments in dev mode @@ -2652,6 +2665,47 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + def test_str_invalid_call(self): + # too many args + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str(1, "", "", 1) + + # no such kw arg + with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"): + str(test=1) + + # 'encoding' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, encoding=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", b"ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", encoding=b"ascii") + + # 'errors' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, "", errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", b"strict") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", errors=b"strict") + + # both positional and kwarg + with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"): + str(b"x", "utf-8", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "ignore", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "strict", errors="ignore") + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py index 80c22c6cdd..fe25bfe9f8 100644 --- a/Lib/test/test_unicode_file.py +++ b/Lib/test/test_unicode_file.py @@ -110,7 +110,7 @@ def _test_single(self, filename): os.unlink(filename) self.assertTrue(not os.path.exists(filename)) # and again with os.open. - f = os.open(filename, os.O_CREAT) + f = os.open(filename, os.O_CREAT | os.O_WRONLY) os.close(f) try: self._do_single(filename) diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py index 47619c8807..25c16e3a0b 100644 --- a/Lib/test/test_unicode_file_functions.py +++ b/Lib/test/test_unicode_file_functions.py @@ -5,7 +5,7 @@ import unittest import warnings from unicodedata import normalize -from test.support import os_helper +from test.support import is_apple, os_helper from test import support @@ -23,13 +23,13 @@ '10_\u1fee\u1ffd', ] -# Mac OS X decomposes Unicode names, using Normal Form D. +# Apple platforms decompose Unicode names, using Normal Form D. # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html # "However, most volume formats do not follow the exact specification for # these normal forms. For example, HFS Plus uses a variant of Normal Form D # in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through # U+2FAFF are not decomposed." -if sys.platform != 'darwin': +if not is_apple: filenames.extend([ # Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different '11_\u0385\u03d3\u03d4', @@ -119,11 +119,11 @@ def test_open(self): os.stat(name) self._apply_failure(os.listdir, name, self._listdir_failure) - # Skip the test on darwin, because darwin does normalize the filename to + # Skip the test on Apple platforms, because they don't normalize the filename to # NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC, # NFKD in Python is useless, because darwin will normalize it later and so # open(), os.stat(), etc. don't raise any exception. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') @unittest.skipIf( support.is_emscripten or support.is_wasi, "test fails on Emscripten/WASI when host platform is macOS." @@ -142,10 +142,10 @@ def test_normalize(self): self._apply_failure(os.remove, name) self._apply_failure(os.listdir, name) - # Skip the test on darwin, because darwin uses a normalization different + # Skip the test on Apple platforms, because they use a normalization different # than Python NFD normalization: filenames are different even if we use # Python NFD normalization. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') def test_listdir(self): sf0 = set(self.files) with warnings.catch_warnings(): diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index d7a0ece253..63c6c05582 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -17,11 +17,9 @@ def test_non_bmp_normalized(self): 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 1 self.assertIn("Unicode", dir()) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 29da4a25a3..2cf367a2cf 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -11,18 +11,21 @@ import sys import unicodedata import unittest -from test.support import (open_urlresource, requires_resource, script_helper, - cpython_only, check_disallow_instantiation, - ResourceDenied) +from test.support import ( + open_urlresource, + requires_resource, + script_helper, + cpython_only, + check_disallow_instantiation, + force_not_colorized, +) class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326' + expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' - # TODO: RUSTPYTHON - @unittest.expectedFailure @requires_resource('cpu') def test_method_checksum(self): h = hashlib.sha1() @@ -74,9 +77,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # Update this if the database changes. Make sure to do a full rebuild # (e.g. 'make distclean && make') to get the correct checksum. - expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370' - # TODO: RUSTPYTHON - @unittest.expectedFailure + expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' + @requires_resource('cpu') def test_function_checksum(self): data = [] @@ -94,6 +96,8 @@ def test_function_checksum(self): self.db.decomposition(char), str(self.db.mirrored(char)), str(self.db.combining(char)), + unicodedata.east_asian_width(char), + self.db.name(char, ""), ] h.update(''.join(data).encode("ascii")) result = h.hexdigest() @@ -106,8 +110,26 @@ def test_name_inverse_lookup(self): if looked_name := self.db.name(char, None): self.assertEqual(self.db.lookup(looked_name), char) - # TODO: RUSTPYTHON - @unittest.expectedFailure + def test_no_names_in_pua(self): + puas = [*range(0xe000, 0xf8ff), + *range(0xf0000, 0xfffff), + *range(0x100000, 0x10ffff)] + for i in puas: + char = chr(i) + self.assertRaises(ValueError, self.db.name, char) + + def test_lookup_nonexistant(self): + # just make sure that lookup can fail + for nonexistant in [ + "LATIN SMLL LETR A", + "OPEN HANDS SIGHS", + "DREGS", + "HANDBUG", + "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK", + "???", + ]: + self.assertRaises(KeyError, self.db.lookup, nonexistant) + def test_digit(self): self.assertEqual(self.db.digit('A', None), None) self.assertEqual(self.db.digit('9'), 9) @@ -120,8 +142,6 @@ def test_digit(self): self.assertRaises(TypeError, self.db.digit, 'xx') self.assertRaises(ValueError, self.db.digit, 'x') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_numeric(self): self.assertEqual(self.db.numeric('A',None), None) self.assertEqual(self.db.numeric('9'), 9) @@ -135,8 +155,6 @@ def test_numeric(self): self.assertRaises(TypeError, self.db.numeric, 'xx') self.assertRaises(ValueError, self.db.numeric, 'x') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_decimal(self): self.assertEqual(self.db.decimal('A',None), None) self.assertEqual(self.db.decimal('9'), 9) @@ -159,8 +177,6 @@ def test_category(self): self.assertRaises(TypeError, self.db.category) self.assertRaises(TypeError, self.db.category, 'xx') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_bidirectional(self): self.assertEqual(self.db.bidirectional('\uFFFE'), '') self.assertEqual(self.db.bidirectional(' '), 'WS') @@ -170,8 +186,6 @@ def test_bidirectional(self): self.assertRaises(TypeError, self.db.bidirectional) self.assertRaises(TypeError, self.db.bidirectional, 'xx') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_decomposition(self): self.assertEqual(self.db.decomposition('\uFFFE'),'') self.assertEqual(self.db.decomposition('\u00bc'), ' 0031 2044 0034') @@ -188,8 +202,6 @@ def test_mirrored(self): self.assertRaises(TypeError, self.db.mirrored) self.assertRaises(TypeError, self.db.mirrored, 'xx') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_combining(self): self.assertEqual(self.db.combining('\uFFFE'), 0) self.assertEqual(self.db.combining('a'), 0) @@ -217,8 +229,6 @@ def test_issue10254(self): b = 'C\u0338' * 20 + '\xC7' self.assertEqual(self.db.normalize('NFC', a), b) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_issue29456(self): # Fix #29456 u1176_str_a = '\u1100\u1176\u11a8' @@ -245,8 +255,23 @@ def test_east_asian_width(self): self.assertEqual(eaw('\u2010'), 'A') self.assertEqual(eaw('\U00020000'), 'W') - # TODO: RUSTPYTHON - @unittest.expectedFailure + def test_east_asian_width_unassigned(self): + eaw = self.db.east_asian_width + # unassigned + for char in '\u0530\u0ecf\u10c6\u20fc\uaaca\U000107bd\U000115f2': + self.assertEqual(eaw(char), 'N') + self.assertIs(self.db.name(char, None), None) + + # unassigned but reserved for CJK + for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD': + self.assertEqual(eaw(char), 'W') + self.assertIs(self.db.name(char, None), None) + + # private use areas + for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0': + self.assertEqual(eaw(char), 'A') + self.assertIs(self.db.name(char, None), None) + def test_east_asian_width_9_0_changes(self): self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N') self.assertEqual(self.db.east_asian_width('\u231a'), 'W') @@ -258,8 +283,7 @@ def test_disallow_instantiation(self): # Ensure that the type disallows instantiation (bpo-43916) check_disallow_instantiation(self, unicodedata.UCD) - # TODO: RUSTPYTHON - @unittest.expectedFailure + @force_not_colorized def test_failed_import_during_compiling(self): # Issue 4367 # Decoding \N escapes requires the unicodedata module. If it can't be @@ -276,8 +300,6 @@ def test_failed_import_during_compiling(self): "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_decimal_numeric_consistent(self): # Test that decimal and numeric are consistent, # i.e. if a character has a decimal value, @@ -291,8 +313,6 @@ def test_decimal_numeric_consistent(self): count += 1 self.assertTrue(count >= 10) # should have tested at least the ASCII digits - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_digit_numeric_consistent(self): # Test that digit and numeric are consistent, # i.e. if a character has a digit value, @@ -309,8 +329,6 @@ def test_digit_numeric_consistent(self): def test_bug_1704793(self): self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_ucd_510(self): import unicodedata # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0 @@ -322,6 +340,7 @@ def test_ucd_510(self): self.assertTrue("\u1d79".upper()=='\ua77d') self.assertTrue(".".upper()=='.') + @requires_resource('cpu') def test_bug_5828(self): self.assertEqual("\u1d79".lower(), "\u1d79") # Only U+0000 should have U+0000 as its upper/lower/titlecase variant @@ -333,8 +352,6 @@ def test_bug_5828(self): [0] ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_bug_4971(self): # LETTER DZ WITH CARON: DZ, Dz, dz self.assertEqual("\u01c4".title(), "\u01c5") @@ -364,6 +381,7 @@ def unistr(data): return "".join([chr(x) for x in data]) @requires_resource('network') + @requires_resource('cpu') def test_normalization(self): TESTDATAFILE = "NormalizationTest.txt" TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}" diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 51b4f6041e..74df52f541 100644 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -7,8 +7,7 @@ from collections import UserString class UserStringTest( - string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, + string_tests.StringLikeTest, unittest.TestCase ): From 58602a3e811a790d28c686ab4b0c1462d7f4d501 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 12:30:10 +0300 Subject: [PATCH 2/3] Apply RustPython patches --- Lib/test/string_tests.py | 9 +++++++- Lib/test/test_bytes.py | 26 ++++++++++++++++++--- Lib/test/test_str.py | 34 ++++++++++++++++++++++++++++ Lib/test/test_unicode_identifiers.py | 2 ++ Lib/test/test_unicodedata.py | 30 ++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 4 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 9bb0ce7bb5..c5831c47fc 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -438,7 +438,8 @@ def test_expandtabs(self): self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42) # This test is only valid when sizeof(int) == sizeof(void*) == 4. - if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4: + # XXX RUSTPYTHON TODO: expandtabs overflow checks + if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4 and False: self.checkraises(OverflowError, '\ta\n\tb', 'expandtabs', sys.maxsize) @@ -779,6 +780,7 @@ def test_replace_uses_two_way_maxcount(self): self.checkequal(AABAA + "ccc", AABAA + ABBA, 'replace', ABBA, "ccc", 2) + @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') def test_replace_overflow(self): @@ -1246,6 +1248,9 @@ def test___contains__(self): self.checkequal(False, 'asd', '__contains__', 'asdf') self.checkequal(False, '', '__contains__', 'asdf') + + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_subscript(self): self.checkequal('a', 'abc', '__getitem__', 0) self.checkequal('c', 'abc', '__getitem__', -1) @@ -1499,6 +1504,8 @@ def test_none_arguments(self): self.checkequal(True, s, 'startswith', 'h', None, -2) self.checkequal(False, s, 'startswith', 'x', None, None) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index f8edfe0a17..e84df546a8 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -201,6 +201,8 @@ def test_constructor_value_errors(self): self.assertRaises(ValueError, self.type2test, [sys.maxsize+1]) self.assertRaises(ValueError, self.type2test, [10**100]) + # TODO: RUSTPYTHON + @unittest.expectedFailure @bigaddrspacetest def test_constructor_overflow(self): size = MAX_Py_ssize_t @@ -324,6 +326,8 @@ def test_decode(self): # Default encoding is utf-8 self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_check_encoding_errors(self): # bpo-37388: bytes(str) and bytes.encode() must check encoding # and errors arguments in dev mode @@ -968,6 +972,8 @@ def test_integer_arguments_out_of_byte_range(self): self.assertRaises(ValueError, method, 256) self.assertRaises(ValueError, method, 9999) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_find_etc_raise_correct_error_messages(self): # issue 11828 b = self.type2test(b'hello') @@ -987,6 +993,8 @@ def test_find_etc_raise_correct_error_messages(self): self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith, x, None, None, None) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_free_after_iterating(self): test.support.check_free_after_iterating(self, iter, self.type2test) test.support.check_free_after_iterating(self, reversed, self.type2test) @@ -1575,6 +1583,11 @@ def test_irepeat_1char(self): self.assertEqual(b, b1) self.assertIs(b, b1) + # NOTE: RUSTPYTHON: + # + # The second instance of self.assertGreater was replaced with + # self.assertGreaterEqual since, in RustPython, the underlying storage + # is a Vec which doesn't require trailing null byte. def test_alloc(self): b = bytearray() alloc = b.__alloc__() @@ -1583,10 +1596,15 @@ def test_alloc(self): for i in range(100): b += b"x" alloc = b.__alloc__() - self.assertGreater(alloc, len(b)) # including trailing null byte + self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched if alloc not in seq: seq.append(alloc) + # NOTE: RUSTPYTHON: + # + # The usages of self.assertGreater were replaced with + # self.assertGreaterEqual since, in RustPython, the underlying storage + # is a Vec which doesn't require trailing null byte. def test_init_alloc(self): b = bytearray() def g(): @@ -1597,12 +1615,12 @@ def g(): self.assertEqual(len(b), len(a)) self.assertLessEqual(len(b), i) alloc = b.__alloc__() - self.assertGreater(alloc, len(b)) # including trailing null byte + self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched b.__init__(g()) self.assertEqual(list(b), list(range(1, 100))) self.assertEqual(len(b), 99) alloc = b.__alloc__() - self.assertGreater(alloc, len(b)) + self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched def test_extend(self): orig = b'hello' @@ -2071,6 +2089,7 @@ def test_join(self): s3 = s1.join([b"abcd"]) self.assertIs(type(s3), self.basetype) + @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_pickle(self): a = self.type2test(b"abcd") a.x = 10 @@ -2085,6 +2104,7 @@ def test_pickle(self): self.assertEqual(type(a.z), type(b.z)) self.assertFalse(hasattr(b, 'y')) + @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest") def test_copy(self): a = self.type2test(b"abcd") a.x = 10 diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 46673cc56a..b2d585c5fc 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -564,6 +564,7 @@ def __str__(self): return self.sval self.checkraises(TypeError, ' ', 'join', [1, 2, 3]) self.checkraises(TypeError, ' ', 'join', ['1', '2', 3]) + @unittest.skip("TODO: RUSTPYTHON, oom handling") @unittest.skipIf(sys.maxsize > 2**32, 'needs too much memory on a 64-bit platform') def test_join_overflow(self): @@ -792,6 +793,8 @@ def test_isdecimal(self): for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']: self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_isdigit(self): super().test_isdigit() self.checkequalnofix(True, '\u2460', 'isdigit') @@ -937,6 +940,8 @@ def test_upper(self): self.assertEqual('\U0008fffe'.upper(), '\U0008fffe') self.assertEqual('\u2177'.upper(), '\u2167') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_capitalize(self): string_tests.StringLikeTest.test_capitalize(self) self.assertEqual('\U0001044F'.capitalize(), '\U00010427') @@ -954,6 +959,8 @@ def test_capitalize(self): self.assertEqual('ﬁnnish'.capitalize(), 'Finnish') self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_title(self): super().test_title() self.assertEqual('\U0001044F'.title(), '\U00010427') @@ -971,6 +978,8 @@ def test_title(self): self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy') self.assertEqual('A\u03a3A'.title(), 'A\u03c3a') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_swapcase(self): string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') @@ -1070,6 +1079,8 @@ def test_issue18183(self): '\U00100000'.ljust(3, '\U00010000') '\U00100000'.rjust(3, '\U00010000') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_format(self): self.assertEqual(''.format(), '') self.assertEqual('a'.format(), 'a') @@ -1453,16 +1464,21 @@ def __getitem__(self, key): self.assertRaises(TypeError, '{a}'.format_map, []) self.assertRaises(ZeroDivisionError, '{a}'.format_map, BadMapping()) + @unittest.skip("TODO: RUSTPYTHON, killed for chewing up RAM") def test_format_huge_precision(self): format_string = ".{}f".format(sys.maxsize + 1) with self.assertRaises(ValueError): result = format(2.34, format_string) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_format_huge_width(self): format_string = "{}f".format(sys.maxsize + 1) with self.assertRaises(ValueError): result = format(2.34, format_string) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_format_huge_item_number(self): format_string = "{{{}:.6f}}".format(sys.maxsize + 1) with self.assertRaises(ValueError): @@ -1498,6 +1514,8 @@ def __format__(self, spec): self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3') self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_formatting(self): string_tests.StringLikeTest.test_formatting(self) # Testing Unicode formatting strings... @@ -1746,6 +1764,8 @@ def __str__(self): 'character buffers are decoded to unicode' ) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_constructor_keyword_args(self): """Pass various keyword argument combinations to the constructor.""" # The object argument can be passed as a keyword. @@ -1755,6 +1775,8 @@ def test_constructor_keyword_args(self): self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" self.assertEqual(str(object=b'foo', errors='strict'), 'foo') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_constructor_defaults(self): """Check the constructor argument defaults.""" # The object argument defaults to '' or b''. @@ -1766,6 +1788,8 @@ def test_constructor_defaults(self): # The errors argument defaults to strict. self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_codecs_utf7(self): utfTests = [ ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example @@ -2275,6 +2299,8 @@ def test_codecs_errors(self): self.assertRaises(ValueError, complex, "\ud800") self.assertRaises(ValueError, complex, "\udf00") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_codecs(self): # Encoding self.assertEqual('hello'.encode('ascii'), b'hello') @@ -2404,6 +2430,8 @@ def test_ucs4(self): else: self.fail("Should have raised UnicodeDecodeError") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_conversion(self): # Make sure __str__() works properly class StrWithStr(str): @@ -2452,6 +2480,7 @@ def test_printable_repr(self): # This test only affects 32-bit platforms because expandtabs can only take # an int as the max value, not a 64-bit C long. If expandtabs is changed # to take a 64-bit long, this test should apply to all platforms. + @unittest.skip("TODO: RUSTPYTHON, oom handling") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') def test_expandtabs_overflows_gracefully(self): @@ -2462,6 +2491,7 @@ def test_expandtabs_optimization(self): s = 'abc' self.assertIs(s.expandtabs(), s) + @unittest.skip("TODO: RUSTPYTHON, aborted: memory allocation of 9223372036854775759 bytes failed") def test_raiseMemError(self): asciifields = "nnb" compactfields = asciifields + "nP" @@ -2601,10 +2631,14 @@ def test_compare(self): self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_free_after_iterating(self): support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_check_encoding_errors(self): # bpo-37388: str(bytes) and str.decode() must check encoding and errors # arguments in dev mode diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index 63c6c05582..60cfdaabe8 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -17,6 +17,8 @@ def test_non_bmp_normalized(self): 𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 1 self.assertIn("Unicode", dir()) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_invalid(self): try: from test.tokenizedata import badsyntax_3131 diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 2cf367a2cf..4804ec297c 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -26,6 +26,8 @@ class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' + # TODO: RUSTPYTHON + @unittest.expectedFailure @requires_resource('cpu') def test_method_checksum(self): h = hashlib.sha1() @@ -79,6 +81,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # (e.g. 'make distclean && make') to get the correct checksum. expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' + # TODO: RUSTPYTHON + @unittest.expectedFailure @requires_resource('cpu') def test_function_checksum(self): data = [] @@ -130,6 +134,8 @@ def test_lookup_nonexistant(self): ]: self.assertRaises(KeyError, self.db.lookup, nonexistant) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_digit(self): self.assertEqual(self.db.digit('A', None), None) self.assertEqual(self.db.digit('9'), 9) @@ -142,6 +148,8 @@ def test_digit(self): self.assertRaises(TypeError, self.db.digit, 'xx') self.assertRaises(ValueError, self.db.digit, 'x') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_numeric(self): self.assertEqual(self.db.numeric('A',None), None) self.assertEqual(self.db.numeric('9'), 9) @@ -155,6 +163,8 @@ def test_numeric(self): self.assertRaises(TypeError, self.db.numeric, 'xx') self.assertRaises(ValueError, self.db.numeric, 'x') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decimal(self): self.assertEqual(self.db.decimal('A',None), None) self.assertEqual(self.db.decimal('9'), 9) @@ -177,6 +187,8 @@ def test_category(self): self.assertRaises(TypeError, self.db.category) self.assertRaises(TypeError, self.db.category, 'xx') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_bidirectional(self): self.assertEqual(self.db.bidirectional('\uFFFE'), '') self.assertEqual(self.db.bidirectional(' '), 'WS') @@ -186,6 +198,8 @@ def test_bidirectional(self): self.assertRaises(TypeError, self.db.bidirectional) self.assertRaises(TypeError, self.db.bidirectional, 'xx') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decomposition(self): self.assertEqual(self.db.decomposition('\uFFFE'),'') self.assertEqual(self.db.decomposition('\u00bc'), ' 0031 2044 0034') @@ -202,6 +216,8 @@ def test_mirrored(self): self.assertRaises(TypeError, self.db.mirrored) self.assertRaises(TypeError, self.db.mirrored, 'xx') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_combining(self): self.assertEqual(self.db.combining('\uFFFE'), 0) self.assertEqual(self.db.combining('a'), 0) @@ -229,6 +245,8 @@ def test_issue10254(self): b = 'C\u0338' * 20 + '\xC7' self.assertEqual(self.db.normalize('NFC', a), b) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_issue29456(self): # Fix #29456 u1176_str_a = '\u1100\u1176\u11a8' @@ -272,6 +290,8 @@ def test_east_asian_width_unassigned(self): self.assertEqual(eaw(char), 'A') self.assertIs(self.db.name(char, None), None) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_east_asian_width_9_0_changes(self): self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N') self.assertEqual(self.db.east_asian_width('\u231a'), 'W') @@ -283,6 +303,8 @@ def test_disallow_instantiation(self): # Ensure that the type disallows instantiation (bpo-43916) check_disallow_instantiation(self, unicodedata.UCD) + # TODO: RUSTPYTHON + @unittest.expectedFailure @force_not_colorized def test_failed_import_during_compiling(self): # Issue 4367 @@ -300,6 +322,8 @@ def test_failed_import_during_compiling(self): "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decimal_numeric_consistent(self): # Test that decimal and numeric are consistent, # i.e. if a character has a decimal value, @@ -313,6 +337,8 @@ def test_decimal_numeric_consistent(self): count += 1 self.assertTrue(count >= 10) # should have tested at least the ASCII digits + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_digit_numeric_consistent(self): # Test that digit and numeric are consistent, # i.e. if a character has a digit value, @@ -329,6 +355,8 @@ def test_digit_numeric_consistent(self): def test_bug_1704793(self): self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ucd_510(self): import unicodedata # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0 @@ -352,6 +380,8 @@ def test_bug_5828(self): [0] ) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_bug_4971(self): # LETTER DZ WITH CARON: DZ, Dz, dz self.assertEqual("\u01c4".title(), "\u01c5") From 2d8a3cb76c4a0f74c439b756110a837a8ff6c969 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:39:24 +0300 Subject: [PATCH 3/3] Mark new failing tests --- Lib/test/string_tests.py | 2 ++ Lib/test/test_str.py | 4 ++++ Lib/test/test_unicodedata.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index c5831c47fc..3f82b515bb 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -154,6 +154,8 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) + # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count + @unittest.expectedFailure def test_count_keyword(self): self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index b2d585c5fc..ef2d211a61 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -112,6 +112,8 @@ def test_literals(self): # raw strings should not have unicode escapes self.assertNotEqual(r"\u0020", " ") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ascii(self): self.assertEqual(ascii('abc'), "'abc'") self.assertEqual(ascii('ab\\c'), "'ab\\\\c'") @@ -2699,6 +2701,8 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_str_invalid_call(self): # too many args with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 4804ec297c..7f49c1690f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -122,6 +122,8 @@ def test_no_names_in_pua(self): char = chr(i) self.assertRaises(ValueError, self.db.name, char) + # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A' + @unittest.expectedFailure def test_lookup_nonexistant(self): # just make sure that lookup can fail for nonexistant in [ @@ -273,6 +275,8 @@ def test_east_asian_width(self): self.assertEqual(eaw('\u2010'), 'A') self.assertEqual(eaw('\U00020000'), 'W') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_east_asian_width_unassigned(self): eaw = self.db.east_asian_width # unassigned

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier! Saves Data!