Content-Length: 86587 | pFad | http://github.com/RustPython/RustPython/pull/5953.patch
thub.com
From 46c7bf77c4377677b1dce1d370f2cff33ee1f9d2 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Sat, 12 Jul 2025 12:09:19 +0300
Subject: [PATCH 1/3] Update str related tests from 3.13.5
---
Lib/test/string_tests.py | 120 ++++++----
Lib/test/test_bytes.py | 265 ++++++++++++++--------
Lib/test/{test_unicode.py => test_str.py} | 222 +++++++++++-------
Lib/test/test_unicode_file.py | 2 +-
Lib/test/test_unicode_file_functions.py | 14 +-
Lib/test/test_unicode_identifiers.py | 4 +-
Lib/test/test_unicodedata.py | 88 ++++---
Lib/test/test_userstring.py | 3 +-
8 files changed, 458 insertions(+), 260 deletions(-)
rename Lib/test/{test_unicode.py => test_str.py} (94%)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 6f402513fd..9bb0ce7bb5 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -8,18 +8,12 @@
from collections import UserList
import random
+
class Sequence:
def __init__(self, seq='wxyz'): self.seq = seq
def __len__(self): return len(self.seq)
def __getitem__(self, i): return self.seq[i]
-class BadSeq1(Sequence):
- def __init__(self): self.seq = [7, 'hello', 123]
- def __str__(self): return '{0} {1} {2}'.format(*self.seq)
-
-class BadSeq2(Sequence):
- def __init__(self): self.seq = ['a', 'b', 'c']
- def __len__(self): return 8
class BaseTest:
# These tests are for buffers of values (bytes) and not
@@ -27,7 +21,7 @@ class BaseTest:
# and various string implementations
# The type to be tested
- # Change in subclasses to change the behaviour of fixtesttype()
+ # Change in subclasses to change the behaviour of fixtype()
type2test = None
# Whether the "contained items" of the container are integers in
@@ -36,7 +30,7 @@ class BaseTest:
contains_bytes = False
# All tests pass their arguments to the testing methods
- # as str objects. fixtesttype() can be used to propagate
+ # as str objects. fixtype() can be used to propagate
# these arguments to the appropriate type
def fixtype(self, obj):
if isinstance(obj, str):
@@ -160,6 +154,12 @@ def test_count(self):
self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
+ def test_count_keyword(self):
+ self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0))
+ self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1))
+ self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2))
+ self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3))
+
def test_find(self):
self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
@@ -327,11 +327,12 @@ def reference_find(p, s):
for i in range(len(s)):
if s.startswith(p, i):
return i
+ if p == '' and s == '':
+ return 0
return -1
- rr = random.randrange
- choices = random.choices
- for _ in range(1000):
+ def check_pattern(rr):
+ choices = random.choices
p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20)
p = p0[:len(p0) - rr(10)] # pop off some characters
left = ''.join(choices('abcdef', k=rr(2000)))
@@ -341,6 +342,49 @@ def reference_find(p, s):
self.checkequal(reference_find(p, text),
text, 'find', p)
+ rr = random.randrange
+ for _ in range(1000):
+ check_pattern(rr)
+
+ # Test that empty string always work:
+ check_pattern(lambda *args: 0)
+
+ def test_find_many_lengths(self):
+ haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)]
+ haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats]
+
+ needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)]
+ needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats]
+
+ for n, haystack1 in haystacks:
+ haystack2 = haystack1[:-1]
+ for m, needle in needles:
+ answer1 = 5 * (n - m) if m <= n else -1
+ self.assertEqual(haystack1.find(needle), answer1, msg=(n,m))
+ self.assertEqual(haystack2.find(needle), -1, msg=(n,m))
+
+ def test_adaptive_find(self):
+ # This would be very slow for the naive algorithm,
+ # but str.find() should be O(n + m).
+ for N in 1000, 10_000, 100_000, 1_000_000:
+ A, B = 'a' * N, 'b' * N
+ haystack = A + A + B + A + A
+ needle = A + B + B + A
+ self.checkequal(-1, haystack, 'find', needle)
+ self.checkequal(0, haystack, 'count', needle)
+ self.checkequal(len(haystack), haystack + needle, 'find', needle)
+ self.checkequal(1, haystack + needle, 'count', needle)
+
+ def test_find_with_memory(self):
+ # Test the "Skip with memory" path in the two-way algorithm.
+ for N in 1000, 3000, 10_000, 30_000:
+ needle = 'ab' * N
+ haystack = ('ab'*(N-1) + 'b') * 2
+ self.checkequal(-1, haystack, 'find', needle)
+ self.checkequal(0, haystack, 'count', needle)
+ self.checkequal(len(haystack), haystack + needle, 'find', needle)
+ self.checkequal(1, haystack + needle, 'count', needle)
+
def test_find_shift_table_overflow(self):
"""When the table of 8-bit shifts overflows."""
N = 2**8 + 100
@@ -394,8 +438,7 @@ def test_expandtabs(self):
self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
# This test is only valid when sizeof(int) == sizeof(void*) == 4.
- # XXX RUSTPYTHON TODO: expandtabs overflow checks
- if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4 and False:
+ if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4:
self.checkraises(OverflowError,
'\ta\n\tb', 'expandtabs', sys.maxsize)
@@ -724,7 +767,18 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
- @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms")
+ def test_replace_uses_two_way_maxcount(self):
+ # Test that maxcount works in _two_way_count in fastsearch.h
+ A, B = "A"*1000, "B"*1000
+ AABAA = A + A + B + A + A
+ ABBA = A + B + B + A
+ self.checkequal(AABAA + ABBA,
+ AABAA + ABBA, 'replace', ABBA, "ccc", 0)
+ self.checkequal(AABAA + "ccc",
+ AABAA + ABBA, 'replace', ABBA, "ccc", 1)
+ self.checkequal(AABAA + "ccc",
+ AABAA + ABBA, 'replace', ABBA, "ccc", 2)
+
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
'only applies to 32-bit platforms')
def test_replace_overflow(self):
@@ -734,8 +788,6 @@ def test_replace_overflow(self):
self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
-
- # Python 3.9
def test_removeprefix(self):
self.checkequal('am', 'spam', 'removeprefix', 'sp')
self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam')
@@ -754,7 +806,6 @@ def test_removeprefix(self):
self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42)
self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l"))
- # Python 3.9
def test_removesuffix(self):
self.checkequal('sp', 'spam', 'removesuffix', 'am')
self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam')
@@ -1053,7 +1104,7 @@ def test_splitlines(self):
self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
-class CommonTest(BaseTest):
+class StringLikeTest(BaseTest):
# This testcase contains tests that can be used in all
# stringlike classes. Currently this is str and UserString.
@@ -1084,11 +1135,6 @@ def test_capitalize_nonascii(self):
self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7',
'\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')
-
-class MixinStrUnicodeUserStringTest:
- # additional tests that only work for
- # stringlike objects, i.e. str, UserString
-
def test_startswith(self):
self.checkequal(True, 'hello', 'startswith', 'he')
self.checkequal(True, 'hello', 'startswith', 'hello')
@@ -1200,9 +1246,6 @@ def test___contains__(self):
self.checkequal(False, 'asd', '__contains__', 'asdf')
self.checkequal(False, '', '__contains__', 'asdf')
-
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_subscript(self):
self.checkequal('a', 'abc', '__getitem__', 0)
self.checkequal('c', 'abc', '__getitem__', -1)
@@ -1273,8 +1316,11 @@ def test_join(self):
self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
('a' * i,) * i)
- #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1())
- self.checkequal('a b c', ' ', 'join', BadSeq2())
+ class LiesAboutLengthSeq(Sequence):
+ def __init__(self): self.seq = ['a', 'b', 'c']
+ def __len__(self): return 8
+
+ self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq())
self.checkraises(TypeError, ' ', 'join')
self.checkraises(TypeError, ' ', 'join', None)
@@ -1453,25 +1499,23 @@ def test_none_arguments(self):
self.checkequal(True, s, 'startswith', 'h', None, -2)
self.checkequal(False, s, 'startswith', 'x', None, None)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
s = 'hello'
x = 'x'
- self.assertRaisesRegex(TypeError, r'^find\(', s.find,
+ self.assertRaisesRegex(TypeError, r'^find\b', s.find,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
+ self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^index\(', s.index,
+ self.assertRaisesRegex(TypeError, r'^index\b', s.index,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
+ self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^count\(', s.count,
+ self.assertRaisesRegex(TypeError, r'^count\b', s.count,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith,
+ self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith,
x, None, None, None)
- self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
+ self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith,
x, None, None, None)
# issue #15534
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 3c634b6cac..f8edfe0a17 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -10,6 +10,7 @@
import sys
import copy
import functools
+import operator
import pickle
import tempfile
import textwrap
@@ -46,6 +47,10 @@ def __index__(self):
class BaseBytesTest:
+ def assertTypedEqual(self, actual, expected):
+ self.assertIs(type(actual), type(expected))
+ self.assertEqual(actual, expected)
+
def test_basics(self):
b = self.type2test()
self.assertEqual(type(b), self.type2test)
@@ -196,8 +201,6 @@ def test_constructor_value_errors(self):
self.assertRaises(ValueError, self.type2test, [sys.maxsize+1])
self.assertRaises(ValueError, self.type2test, [10**100])
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
@bigaddrspacetest
def test_constructor_overflow(self):
size = MAX_Py_ssize_t
@@ -321,8 +324,6 @@ def test_decode(self):
# Default encoding is utf-8
self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_check_encoding_errors(self):
# bpo-37388: bytes(str) and bytes.encode() must check encoding
# and errors arguments in dev mode
@@ -737,6 +738,37 @@ def check(fmt, vals, result):
check(b'%i%b %*.*b', (10, b'3', 5, 3, b'abc',), b'103 abc')
check(b'%c', b'a', b'a')
+ class PseudoFloat:
+ def __init__(self, value):
+ self.value = float(value)
+ def __int__(self):
+ return int(self.value)
+
+ pi = PseudoFloat(3.1415)
+
+ exceptions_params = [
+ ('%x format: an integer is required, not float', b'%x', 3.14),
+ ('%X format: an integer is required, not float', b'%X', 2.11),
+ ('%o format: an integer is required, not float', b'%o', 1.79),
+ ('%x format: an integer is required, not PseudoFloat', b'%x', pi),
+ ('%x format: an integer is required, not complex', b'%x', 3j),
+ ('%X format: an integer is required, not complex', b'%X', 2j),
+ ('%o format: an integer is required, not complex', b'%o', 1j),
+ ('%u format: a real number is required, not complex', b'%u', 3j),
+ # See https://github.com/python/cpython/issues/130928 as for why
+ # the exception message contains '%d' instead of '%i'.
+ ('%d format: a real number is required, not complex', b'%i', 2j),
+ ('%d format: a real number is required, not complex', b'%d', 2j),
+ (
+ r'%c requires an integer in range\(256\) or a single byte',
+ b'%c', pi
+ ),
+ ]
+
+ for msg, format_bytes, value in exceptions_params:
+ with self.assertRaisesRegex(TypeError, msg):
+ operator.mod(format_bytes, value)
+
def test_imod(self):
b = self.type2test(b'hello, %b!')
orig = b
@@ -936,8 +968,6 @@ def test_integer_arguments_out_of_byte_range(self):
self.assertRaises(ValueError, method, 256)
self.assertRaises(ValueError, method, 9999)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
b = self.type2test(b'hello')
@@ -957,8 +987,6 @@ def test_find_etc_raise_correct_error_messages(self):
self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith,
x, None, None, None)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_free_after_iterating(self):
test.support.check_free_after_iterating(self, iter, self.type2test)
test.support.check_free_after_iterating(self, reversed, self.type2test)
@@ -995,13 +1023,13 @@ def test_translate(self):
self.assertEqual(c, b'hllo')
def test_sq_item(self):
- _testcapi = import_helper.import_module('_testcapi')
+ _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
obj = self.type2test((42,))
with self.assertRaises(IndexError):
- _testcapi.sequence_getitem(obj, -2)
+ _testlimitedcapi.sequence_getitem(obj, -2)
with self.assertRaises(IndexError):
- _testcapi.sequence_getitem(obj, 1)
- self.assertEqual(_testcapi.sequence_getitem(obj, 0), 42)
+ _testlimitedcapi.sequence_getitem(obj, 1)
+ self.assertEqual(_testlimitedcapi.sequence_getitem(obj, 0), 42)
class BytesTest(BaseBytesTest, unittest.TestCase):
@@ -1031,36 +1059,63 @@ def test_buffer_is_readonly(self):
self.assertRaises(TypeError, f.readinto, b"")
def test_custom(self):
- class A:
- def __bytes__(self):
- return b'abc'
- self.assertEqual(bytes(A()), b'abc')
- class A: pass
- self.assertRaises(TypeError, bytes, A())
- class A:
- def __bytes__(self):
- return None
- self.assertRaises(TypeError, bytes, A())
- class A:
+ self.assertEqual(bytes(BytesSubclass(b'abc')), b'abc')
+ self.assertEqual(BytesSubclass(OtherBytesSubclass(b'abc')),
+ BytesSubclass(b'abc'))
+ self.assertEqual(bytes(WithBytes(b'abc')), b'abc')
+ self.assertEqual(BytesSubclass(WithBytes(b'abc')), BytesSubclass(b'abc'))
+
+ class NoBytes: pass
+ self.assertRaises(TypeError, bytes, NoBytes())
+ self.assertRaises(TypeError, bytes, WithBytes('abc'))
+ self.assertRaises(TypeError, bytes, WithBytes(None))
+ class IndexWithBytes:
def __bytes__(self):
return b'a'
def __index__(self):
return 42
- self.assertEqual(bytes(A()), b'a')
+ self.assertEqual(bytes(IndexWithBytes()), b'a')
# Issue #25766
- class A(str):
+ class StrWithBytes(str):
+ def __new__(cls, value):
+ self = str.__new__(cls, '\u20ac')
+ self.value = value
+ return self
def __bytes__(self):
- return b'abc'
- self.assertEqual(bytes(A('\u20ac')), b'abc')
- self.assertEqual(bytes(A('\u20ac'), 'iso8859-15'), b'\xa4')
+ return self.value
+ self.assertEqual(bytes(StrWithBytes(b'abc')), b'abc')
+ self.assertEqual(bytes(StrWithBytes(b'abc'), 'iso8859-15'), b'\xa4')
+ self.assertEqual(bytes(StrWithBytes(BytesSubclass(b'abc'))), b'abc')
+ self.assertEqual(BytesSubclass(StrWithBytes(b'abc')), BytesSubclass(b'abc'))
+ self.assertEqual(BytesSubclass(StrWithBytes(b'abc'), 'iso8859-15'),
+ BytesSubclass(b'\xa4'))
+ self.assertEqual(BytesSubclass(StrWithBytes(BytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
+ self.assertEqual(BytesSubclass(StrWithBytes(OtherBytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
# Issue #24731
- class A:
+ self.assertTypedEqual(bytes(WithBytes(BytesSubclass(b'abc'))), BytesSubclass(b'abc'))
+ self.assertTypedEqual(BytesSubclass(WithBytes(BytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
+ self.assertTypedEqual(BytesSubclass(WithBytes(OtherBytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
+
+ class BytesWithBytes(bytes):
+ def __new__(cls, value):
+ self = bytes.__new__(cls, b'\xa4')
+ self.value = value
+ return self
def __bytes__(self):
- return OtherBytesSubclass(b'abc')
- self.assertEqual(bytes(A()), b'abc')
- self.assertIs(type(bytes(A())), OtherBytesSubclass)
- self.assertEqual(BytesSubclass(A()), b'abc')
- self.assertIs(type(BytesSubclass(A())), BytesSubclass)
+ return self.value
+ self.assertTypedEqual(bytes(BytesWithBytes(b'abc')), b'abc')
+ self.assertTypedEqual(BytesSubclass(BytesWithBytes(b'abc')),
+ BytesSubclass(b'abc'))
+ self.assertTypedEqual(bytes(BytesWithBytes(BytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
+ self.assertTypedEqual(BytesSubclass(BytesWithBytes(BytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
+ self.assertTypedEqual(BytesSubclass(BytesWithBytes(OtherBytesSubclass(b'abc'))),
+ BytesSubclass(b'abc'))
# Test PyBytes_FromFormat()
def test_from_format(self):
@@ -1233,6 +1288,8 @@ class SubBytes(bytes):
class ByteArrayTest(BaseBytesTest, unittest.TestCase):
type2test = bytearray
+ _testlimitedcapi = import_helper.import_module('_testlimitedcapi')
+
def test_getitem_error(self):
b = bytearray(b'python')
msg = "bytearray indices must be integers or slices"
@@ -1325,47 +1382,73 @@ def by(s):
self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")])
def test_setitem(self):
- b = bytearray([1, 2, 3])
- b[1] = 100
- self.assertEqual(b, bytearray([1, 100, 3]))
- b[-1] = 200
- self.assertEqual(b, bytearray([1, 100, 200]))
- b[0] = Indexable(10)
- self.assertEqual(b, bytearray([10, 100, 200]))
- try:
- b[3] = 0
- self.fail("Didn't raise IndexError")
- except IndexError:
- pass
- try:
- b[-10] = 0
- self.fail("Didn't raise IndexError")
- except IndexError:
- pass
- try:
- b[0] = 256
- self.fail("Didn't raise ValueError")
- except ValueError:
- pass
- try:
- b[0] = Indexable(-1)
- self.fail("Didn't raise ValueError")
- except ValueError:
- pass
- try:
- b[0] = None
- self.fail("Didn't raise TypeError")
- except TypeError:
- pass
+ def setitem_as_mapping(b, i, val):
+ b[i] = val
+
+ def setitem_as_sequence(b, i, val):
+ self._testlimitedcapi.sequence_setitem(b, i, val)
+
+ def do_tests(setitem):
+ b = bytearray([1, 2, 3])
+ setitem(b, 1, 100)
+ self.assertEqual(b, bytearray([1, 100, 3]))
+ setitem(b, -1, 200)
+ self.assertEqual(b, bytearray([1, 100, 200]))
+ setitem(b, 0, Indexable(10))
+ self.assertEqual(b, bytearray([10, 100, 200]))
+ try:
+ setitem(b, 3, 0)
+ self.fail("Didn't raise IndexError")
+ except IndexError:
+ pass
+ try:
+ setitem(b, -10, 0)
+ self.fail("Didn't raise IndexError")
+ except IndexError:
+ pass
+ try:
+ setitem(b, 0, 256)
+ self.fail("Didn't raise ValueError")
+ except ValueError:
+ pass
+ try:
+ setitem(b, 0, Indexable(-1))
+ self.fail("Didn't raise ValueError")
+ except ValueError:
+ pass
+ try:
+ setitem(b, 0, object())
+ self.fail("Didn't raise TypeError")
+ except TypeError:
+ pass
+
+ with self.subTest("tp_as_mapping"):
+ do_tests(setitem_as_mapping)
+
+ with self.subTest("tp_as_sequence"):
+ do_tests(setitem_as_sequence)
def test_delitem(self):
- b = bytearray(range(10))
- del b[0]
- self.assertEqual(b, bytearray(range(1, 10)))
- del b[-1]
- self.assertEqual(b, bytearray(range(1, 9)))
- del b[4]
- self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8]))
+ def del_as_mapping(b, i):
+ del b[i]
+
+ def del_as_sequence(b, i):
+ self._testlimitedcapi.sequence_delitem(b, i)
+
+ def do_tests(delete):
+ b = bytearray(range(10))
+ delete(b, 0)
+ self.assertEqual(b, bytearray(range(1, 10)))
+ delete(b, -1)
+ self.assertEqual(b, bytearray(range(1, 9)))
+ delete(b, 4)
+ self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8]))
+
+ with self.subTest("tp_as_mapping"):
+ do_tests(del_as_mapping)
+
+ with self.subTest("tp_as_sequence"):
+ do_tests(del_as_sequence)
def test_setslice(self):
b = bytearray(range(10))
@@ -1492,11 +1575,6 @@ def test_irepeat_1char(self):
self.assertEqual(b, b1)
self.assertIs(b, b1)
- # NOTE: RUSTPYTHON:
- #
- # The second instance of self.assertGreater was replaced with
- # self.assertGreaterEqual since, in RustPython, the underlying storage
- # is a Vec which doesn't require trailing null byte.
def test_alloc(self):
b = bytearray()
alloc = b.__alloc__()
@@ -1505,15 +1583,10 @@ def test_alloc(self):
for i in range(100):
b += b"x"
alloc = b.__alloc__()
- self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
+ self.assertGreater(alloc, len(b)) # including trailing null byte
if alloc not in seq:
seq.append(alloc)
- # NOTE: RUSTPYTHON:
- #
- # The usages of self.assertGreater were replaced with
- # self.assertGreaterEqual since, in RustPython, the underlying storage
- # is a Vec which doesn't require trailing null byte.
def test_init_alloc(self):
b = bytearray()
def g():
@@ -1524,12 +1597,12 @@ def g():
self.assertEqual(len(b), len(a))
self.assertLessEqual(len(b), i)
alloc = b.__alloc__()
- self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
+ self.assertGreater(alloc, len(b)) # including trailing null byte
b.__init__(g())
self.assertEqual(list(b), list(range(1, 100)))
self.assertEqual(len(b), 99)
alloc = b.__alloc__()
- self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
+ self.assertGreater(alloc, len(b))
def test_extend(self):
orig = b'hello'
@@ -1558,6 +1631,13 @@ def test_extend(self):
a = bytearray(b'')
a.extend([Indexable(ord('a'))])
self.assertEqual(a, b'a')
+ a = bytearray(b'abc')
+ self.assertRaisesRegex(TypeError, # Override for string.
+ "expected iterable of integers; got: 'str'",
+ a.extend, 'def')
+ self.assertRaisesRegex(TypeError, # But not for others.
+ "can't extend bytearray with float",
+ a.extend, 1.0)
def test_remove(self):
b = bytearray(b'hello')
@@ -1747,6 +1827,8 @@ def test_repeat_after_setslice(self):
self.assertEqual(b3, b'xcxcxc')
def test_mutating_index(self):
+ # See gh-91153
+
class Boom:
def __index__(self):
b.clear()
@@ -1758,10 +1840,9 @@ def __index__(self):
b[0] = Boom()
with self.subTest("tp_as_sequence"):
- _testcapi = import_helper.import_module('_testcapi')
b = bytearray(b'Now you see me...')
with self.assertRaises(IndexError):
- _testcapi.sequence_setitem(b, 0, Boom())
+ self._testlimitedcapi.sequence_setitem(b, 0, Boom())
class AssortedBytesTest(unittest.TestCase):
@@ -1990,7 +2071,6 @@ def test_join(self):
s3 = s1.join([b"abcd"])
self.assertIs(type(s3), self.basetype)
- @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest")
def test_pickle(self):
a = self.type2test(b"abcd")
a.x = 10
@@ -2005,7 +2085,6 @@ def test_pickle(self):
self.assertEqual(type(a.z), type(b.z))
self.assertFalse(hasattr(b, 'y'))
- @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest")
def test_copy(self):
a = self.type2test(b"abcd")
a.x = 10
@@ -2060,6 +2139,12 @@ class BytesSubclass(bytes):
class OtherBytesSubclass(bytes):
pass
+class WithBytes:
+ def __init__(self, value):
+ self.value = value
+ def __bytes__(self):
+ return self.value
+
class ByteArraySubclassTest(SubclassTest, unittest.TestCase):
basetype = bytearray
type2test = ByteArraySubclass
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_str.py
similarity index 94%
rename from Lib/test/test_unicode.py
rename to Lib/test/test_str.py
index 1a8a8f7ee9..46673cc56a 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_str.py
@@ -7,6 +7,7 @@
"""
import _string
import codecs
+import datetime
import itertools
import operator
import pickle
@@ -55,8 +56,22 @@ def duplicate_string(text):
class StrSubclass(str):
pass
-class UnicodeTest(string_tests.CommonTest,
- string_tests.MixinStrUnicodeUserStringTest,
+class OtherStrSubclass(str):
+ pass
+
+class WithStr:
+ def __init__(self, value):
+ self.value = value
+ def __str__(self):
+ return self.value
+
+class WithRepr:
+ def __init__(self, value):
+ self.value = value
+ def __repr__(self):
+ return self.value
+
+class StrTest(string_tests.StringLikeTest,
string_tests.MixinStrUnicodeTest,
unittest.TestCase):
@@ -84,6 +99,10 @@ def __repr__(self):
self.assertEqual(realresult, result)
self.assertTrue(object is not realresult)
+ def assertTypedEqual(self, actual, expected):
+ self.assertIs(type(actual), type(expected))
+ self.assertEqual(actual, expected)
+
def test_literals(self):
self.assertEqual('\xff', '\u00ff')
self.assertEqual('\uffff', '\U0000ffff')
@@ -128,10 +147,13 @@ def test_ascii(self):
self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096),
ascii("\U00010000" * 39 + "\uffff" * 4096))
- class WrongRepr:
- def __repr__(self):
- return b'byte-repr'
- self.assertRaises(TypeError, ascii, WrongRepr())
+ self.assertTypedEqual(ascii('\U0001f40d'), r"'\U0001f40d'")
+ self.assertTypedEqual(ascii(StrSubclass('abc')), "'abc'")
+ self.assertTypedEqual(ascii(WithRepr('')), '')
+ self.assertTypedEqual(ascii(WithRepr(StrSubclass(''))), StrSubclass(''))
+ self.assertTypedEqual(ascii(WithRepr('<\U0001f40d>')), r'<\U0001f40d>')
+ self.assertTypedEqual(ascii(WithRepr(StrSubclass('<\U0001f40d>'))), r'<\U0001f40d>')
+ self.assertRaises(TypeError, ascii, WithRepr(b'byte-repr'))
def test_repr(self):
# Test basic sanity of repr()
@@ -169,10 +191,13 @@ def test_repr(self):
self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
repr("\U00010000" * 39 + "\uffff" * 4096))
- class WrongRepr:
- def __repr__(self):
- return b'byte-repr'
- self.assertRaises(TypeError, repr, WrongRepr())
+ self.assertTypedEqual(repr('\U0001f40d'), "'\U0001f40d'")
+ self.assertTypedEqual(repr(StrSubclass('abc')), "'abc'")
+ self.assertTypedEqual(repr(WithRepr('')), '')
+ self.assertTypedEqual(repr(WithRepr(StrSubclass(''))), StrSubclass(''))
+ self.assertTypedEqual(repr(WithRepr('<\U0001f40d>')), '<\U0001f40d>')
+ self.assertTypedEqual(repr(WithRepr(StrSubclass('<\U0001f40d>'))), StrSubclass('<\U0001f40d>'))
+ self.assertRaises(TypeError, repr, WithRepr(b'byte-repr'))
def test_iterators(self):
# Make sure unicode objects have an __iter__ method
@@ -213,7 +238,7 @@ def test_pickle_iterator(self):
self.assertEqual(case, pickled)
def test_count(self):
- string_tests.CommonTest.test_count(self)
+ string_tests.StringLikeTest.test_count(self)
# check mixed argument types
self.checkequalnofix(3, 'aaa', 'count', 'a')
self.checkequalnofix(0, 'aaa', 'count', 'b')
@@ -243,7 +268,7 @@ class MyStr(str):
self.checkequal(3, MyStr('aaa'), 'count', 'a')
def test_find(self):
- string_tests.CommonTest.test_find(self)
+ string_tests.StringLikeTest.test_find(self)
# test implementation details of the memchr fast path
self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102')
self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201')
@@ -288,7 +313,7 @@ def test_find(self):
self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304')
def test_rfind(self):
- string_tests.CommonTest.test_rfind(self)
+ string_tests.StringLikeTest.test_rfind(self)
# test implementation details of the memrchr fast path
self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102')
self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201')
@@ -329,7 +354,7 @@ def test_rfind(self):
self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102')
def test_index(self):
- string_tests.CommonTest.test_index(self)
+ string_tests.StringLikeTest.test_index(self)
self.checkequalnofix(0, 'abcdefghiabc', 'index', '')
self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def')
self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc')
@@ -353,7 +378,7 @@ def test_index(self):
self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304')
def test_rindex(self):
- string_tests.CommonTest.test_rindex(self)
+ string_tests.StringLikeTest.test_rindex(self)
self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '')
self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def')
self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc')
@@ -449,7 +474,7 @@ def test_maketrans_translate(self):
self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
def test_split(self):
- string_tests.CommonTest.test_split(self)
+ string_tests.StringLikeTest.test_split(self)
# test mixed kinds
for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
@@ -466,7 +491,7 @@ def test_split(self):
left + delim * 2 + right, 'split', delim *2)
def test_rsplit(self):
- string_tests.CommonTest.test_rsplit(self)
+ string_tests.StringLikeTest.test_rsplit(self)
# test mixed kinds
for left, right in ('ba', 'ΡΡ', '\u0101\u0100', '\U00010301\U00010300'):
left *= 9
@@ -486,7 +511,7 @@ def test_rsplit(self):
left + right, 'rsplit', None)
def test_partition(self):
- string_tests.MixinStrUnicodeUserStringTest.test_partition(self)
+ string_tests.StringLikeTest.test_partition(self)
# test mixed kinds
self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200')
for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
@@ -503,7 +528,7 @@ def test_partition(self):
left + delim * 2 + right, 'partition', delim * 2)
def test_rpartition(self):
- string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self)
+ string_tests.StringLikeTest.test_rpartition(self)
# test mixed kinds
self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200')
for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'):
@@ -520,7 +545,7 @@ def test_rpartition(self):
left + delim * 2 + right, 'rpartition', delim * 2)
def test_join(self):
- string_tests.MixinStrUnicodeUserStringTest.test_join(self)
+ string_tests.StringLikeTest.test_join(self)
class MyWrapper:
def __init__(self, sval): self.sval = sval
@@ -539,7 +564,6 @@ def __str__(self): return self.sval
self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
- @unittest.skip("TODO: RUSTPYTHON, oom handling")
@unittest.skipIf(sys.maxsize > 2**32,
'needs too much memory on a 64-bit platform')
def test_join_overflow(self):
@@ -548,7 +572,7 @@ def test_join_overflow(self):
self.assertRaises(OverflowError, ''.join, seq)
def test_replace(self):
- string_tests.CommonTest.test_replace(self)
+ string_tests.StringLikeTest.test_replace(self)
# method call forwarded from str implementation because of unicode argument
self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
@@ -768,8 +792,6 @@ def test_isdecimal(self):
for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_isdigit(self):
super().test_isdigit()
self.checkequalnofix(True, '\u2460', 'isdigit')
@@ -831,6 +853,15 @@ def test_isprintable(self):
self.assertTrue('\U0001F46F'.isprintable())
self.assertFalse('\U000E0020'.isprintable())
+ @support.requires_resource('cpu')
+ def test_isprintable_invariant(self):
+ for codepoint in range(sys.maxunicode + 1):
+ char = chr(codepoint)
+ category = unicodedata.category(char)
+ self.assertEqual(char.isprintable(),
+ category[0] not in ('C', 'Z')
+ or char == ' ')
+
def test_surrogates(self):
for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800',
'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'):
@@ -859,7 +890,7 @@ def test_surrogates(self):
def test_lower(self):
- string_tests.CommonTest.test_lower(self)
+ string_tests.StringLikeTest.test_lower(self)
self.assertEqual('\U00010427'.lower(), '\U0001044F')
self.assertEqual('\U00010427\U00010427'.lower(),
'\U0001044F\U0001044F')
@@ -890,7 +921,7 @@ def test_casefold(self):
self.assertEqual('\u00b5'.casefold(), '\u03bc')
def test_upper(self):
- string_tests.CommonTest.test_upper(self)
+ string_tests.StringLikeTest.test_upper(self)
self.assertEqual('\U0001044F'.upper(), '\U00010427')
self.assertEqual('\U0001044F\U0001044F'.upper(),
'\U00010427\U00010427')
@@ -906,10 +937,8 @@ def test_upper(self):
self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
self.assertEqual('\u2177'.upper(), '\u2167')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_capitalize(self):
- string_tests.CommonTest.test_capitalize(self)
+ string_tests.StringLikeTest.test_capitalize(self)
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
self.assertEqual('\U0001044F\U0001044F'.capitalize(),
'\U00010427\U0001044F')
@@ -925,8 +954,6 @@ def test_capitalize(self):
self.assertEqual('ο¬nnish'.capitalize(), 'Finnish')
self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_title(self):
super().test_title()
self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -944,10 +971,8 @@ def test_title(self):
self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_swapcase(self):
- string_tests.CommonTest.test_swapcase(self)
+ string_tests.StringLikeTest.test_swapcase(self)
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
self.assertEqual('\U00010427'.swapcase(), '\U0001044F')
self.assertEqual('\U0001044F\U0001044F'.swapcase(),
@@ -973,7 +998,7 @@ def test_swapcase(self):
self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300')
def test_center(self):
- string_tests.CommonTest.test_center(self)
+ string_tests.StringLikeTest.test_center(self)
self.assertEqual('x'.center(2, '\U0010FFFF'),
'x\U0010FFFF')
self.assertEqual('x'.center(3, '\U0010FFFF'),
@@ -1045,8 +1070,6 @@ def test_issue18183(self):
'\U00100000'.ljust(3, '\U00010000')
'\U00100000'.rjust(3, '\U00010000')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_format(self):
self.assertEqual(''.format(), '')
self.assertEqual('a'.format(), 'a')
@@ -1430,21 +1453,16 @@ def __getitem__(self, key):
self.assertRaises(TypeError, '{a}'.format_map, [])
self.assertRaises(ZeroDivisionError, '{a}'.format_map, BadMapping())
- @unittest.skip("TODO: RUSTPYTHON, killed for chewing up RAM")
def test_format_huge_precision(self):
format_string = ".{}f".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
result = format(2.34, format_string)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_format_huge_width(self):
format_string = "{}f".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
result = format(2.34, format_string)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_format_huge_item_number(self):
format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
@@ -1480,10 +1498,8 @@ def __format__(self, spec):
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_formatting(self):
- string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
+ string_tests.StringLikeTest.test_formatting(self)
# Testing Unicode formatting strings...
self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00')
@@ -1659,7 +1675,7 @@ def test_startswith_endswith_errors(self):
self.assertIn('str', exc)
self.assertIn('tuple', exc)
- @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
+ @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '')
def test_format_float(self):
# should not format with a comma, but always with C locale
self.assertEqual('1.0', '%.1f' % 1.0)
@@ -1730,10 +1746,6 @@ def __str__(self):
'character buffers are decoded to unicode'
)
- self.assertRaises(TypeError, str, 42, 42, 42)
-
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_constructor_keyword_args(self):
"""Pass various keyword argument combinations to the constructor."""
# The object argument can be passed as a keyword.
@@ -1743,8 +1755,6 @@ def test_constructor_keyword_args(self):
self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
self.assertEqual(str(object=b'foo', errors='strict'), 'foo')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_constructor_defaults(self):
"""Check the constructor argument defaults."""
# The object argument defaults to '' or b''.
@@ -1756,8 +1766,6 @@ def test_constructor_defaults(self):
# The errors argument defaults to strict.
self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_codecs_utf7(self):
utfTests = [
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
@@ -1910,6 +1918,12 @@ def test_utf8_decode_invalid_sequences(self):
self.assertRaises(UnicodeDecodeError,
(b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8')
+ def test_issue127903(self):
+ # gh-127903: ``_copy_characters`` crashes on DEBUG builds when
+ # there is nothing to copy.
+ d = datetime.datetime(2013, 11, 10, 14, 20, 59)
+ self.assertEqual(d.strftime('%z'), '')
+
def test_issue8271(self):
# Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
# only the start byte and the continuation byte(s) are now considered
@@ -2261,8 +2275,6 @@ def test_codecs_errors(self):
self.assertRaises(ValueError, complex, "\ud800")
self.assertRaises(ValueError, complex, "\udf00")
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_codecs(self):
# Encoding
self.assertEqual('hello'.encode('ascii'), b'hello')
@@ -2392,32 +2404,39 @@ def test_ucs4(self):
else:
self.fail("Should have raised UnicodeDecodeError")
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_conversion(self):
# Make sure __str__() works properly
- class ObjectToStr:
- def __str__(self):
- return "foo"
-
- class StrSubclassToStr(str):
- def __str__(self):
- return "foo"
-
- class StrSubclassToStrSubclass(str):
- def __new__(cls, content=""):
- return str.__new__(cls, 2*content)
- def __str__(self):
+ class StrWithStr(str):
+ def __new__(cls, value):
+ self = str.__new__(cls, "")
+ self.value = value
return self
+ def __str__(self):
+ return self.value
- self.assertEqual(str(ObjectToStr()), "foo")
- self.assertEqual(str(StrSubclassToStr("bar")), "foo")
- s = str(StrSubclassToStrSubclass("foo"))
- self.assertEqual(s, "foofoo")
- self.assertIs(type(s), StrSubclassToStrSubclass)
- s = StrSubclass(StrSubclassToStrSubclass("foo"))
- self.assertEqual(s, "foofoo")
- self.assertIs(type(s), StrSubclass)
+ self.assertTypedEqual(str(WithStr('abc')), 'abc')
+ self.assertTypedEqual(str(WithStr(StrSubclass('abc'))), StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(WithStr('abc')), StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(WithStr(StrSubclass('abc'))),
+ StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(WithStr(OtherStrSubclass('abc'))),
+ StrSubclass('abc'))
+
+ self.assertTypedEqual(str(StrWithStr('abc')), 'abc')
+ self.assertTypedEqual(str(StrWithStr(StrSubclass('abc'))), StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(StrWithStr('abc')), StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(StrWithStr(StrSubclass('abc'))),
+ StrSubclass('abc'))
+ self.assertTypedEqual(StrSubclass(StrWithStr(OtherStrSubclass('abc'))),
+ StrSubclass('abc'))
+
+ self.assertTypedEqual(str(WithRepr('')), '')
+ self.assertTypedEqual(str(WithRepr(StrSubclass(''))), StrSubclass(''))
+ self.assertTypedEqual(StrSubclass(WithRepr('')), StrSubclass(''))
+ self.assertTypedEqual(StrSubclass(WithRepr(StrSubclass(''))),
+ StrSubclass(''))
+ self.assertTypedEqual(StrSubclass(WithRepr(OtherStrSubclass(''))),
+ StrSubclass(''))
def test_unicode_repr(self):
class s1:
@@ -2433,7 +2452,6 @@ def test_printable_repr(self):
# This test only affects 32-bit platforms because expandtabs can only take
# an int as the max value, not a 64-bit C long. If expandtabs is changed
# to take a 64-bit long, this test should apply to all platforms.
- @unittest.skip("TODO: RUSTPYTHON, oom handling")
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
'only applies to 32-bit platforms')
def test_expandtabs_overflows_gracefully(self):
@@ -2444,7 +2462,6 @@ def test_expandtabs_optimization(self):
s = 'abc'
self.assertIs(s.expandtabs(), s)
- @unittest.skip("TODO: RUSTPYTHON, aborted: memory allocation of 9223372036854775759 bytes failed")
def test_raiseMemError(self):
asciifields = "nnb"
compactfields = asciifields + "nP"
@@ -2584,14 +2601,10 @@ def test_compare(self):
self.assertTrue(astral >= bmp2)
self.assertFalse(astral >= astral2)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_free_after_iterating(self):
support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_check_encoding_errors(self):
# bpo-37388: str(bytes) and str.decode() must check encoding and errors
# arguments in dev mode
@@ -2652,6 +2665,47 @@ def test_check_encoding_errors(self):
proc = assert_python_failure('-X', 'dev', '-c', code)
self.assertEqual(proc.rc, 10, proc)
+ def test_str_invalid_call(self):
+ # too many args
+ with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"):
+ str("too", "many", "argu", "ments")
+ with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"):
+ str(1, "", "", 1)
+
+ # no such kw arg
+ with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"):
+ str(test=1)
+
+ # 'encoding' must be str
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"):
+ str(1, 1)
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"):
+ str(1, encoding=1)
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"):
+ str(b"x", b"ascii")
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"):
+ str(b"x", encoding=b"ascii")
+
+ # 'errors' must be str
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"):
+ str(1, 1, 1)
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"):
+ str(1, errors=1)
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"):
+ str(1, "", errors=1)
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"):
+ str(b"x", "ascii", b"strict")
+ with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"):
+ str(b"x", "ascii", errors=b"strict")
+
+ # both positional and kwarg
+ with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"):
+ str(b"x", "utf-8", encoding="ascii")
+ with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"):
+ str(b"x", "utf-8", "ignore", encoding="ascii")
+ with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"):
+ str(b"x", "utf-8", "strict", errors="ignore")
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py
index 80c22c6cdd..fe25bfe9f8 100644
--- a/Lib/test/test_unicode_file.py
+++ b/Lib/test/test_unicode_file.py
@@ -110,7 +110,7 @@ def _test_single(self, filename):
os.unlink(filename)
self.assertTrue(not os.path.exists(filename))
# and again with os.open.
- f = os.open(filename, os.O_CREAT)
+ f = os.open(filename, os.O_CREAT | os.O_WRONLY)
os.close(f)
try:
self._do_single(filename)
diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py
index 47619c8807..25c16e3a0b 100644
--- a/Lib/test/test_unicode_file_functions.py
+++ b/Lib/test/test_unicode_file_functions.py
@@ -5,7 +5,7 @@
import unittest
import warnings
from unicodedata import normalize
-from test.support import os_helper
+from test.support import is_apple, os_helper
from test import support
@@ -23,13 +23,13 @@
'10_\u1fee\u1ffd',
]
-# Mac OS X decomposes Unicode names, using Normal Form D.
+# Apple platforms decompose Unicode names, using Normal Form D.
# http://developer.apple.com/mac/library/qa/qa2001/qa1173.html
# "However, most volume formats do not follow the exact specification for
# these normal forms. For example, HFS Plus uses a variant of Normal Form D
# in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
# U+2FAFF are not decomposed."
-if sys.platform != 'darwin':
+if not is_apple:
filenames.extend([
# Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different
'11_\u0385\u03d3\u03d4',
@@ -119,11 +119,11 @@ def test_open(self):
os.stat(name)
self._apply_failure(os.listdir, name, self._listdir_failure)
- # Skip the test on darwin, because darwin does normalize the filename to
+ # Skip the test on Apple platforms, because they don't normalize the filename to
# NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC,
# NFKD in Python is useless, because darwin will normalize it later and so
# open(), os.stat(), etc. don't raise any exception.
- @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X')
+ @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms')
@unittest.skipIf(
support.is_emscripten or support.is_wasi,
"test fails on Emscripten/WASI when host platform is macOS."
@@ -142,10 +142,10 @@ def test_normalize(self):
self._apply_failure(os.remove, name)
self._apply_failure(os.listdir, name)
- # Skip the test on darwin, because darwin uses a normalization different
+ # Skip the test on Apple platforms, because they use a normalization different
# than Python NFD normalization: filenames are different even if we use
# Python NFD normalization.
- @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X')
+ @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms')
def test_listdir(self):
sf0 = set(self.files)
with warnings.catch_warnings():
diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py
index d7a0ece253..63c6c05582 100644
--- a/Lib/test/test_unicode_identifiers.py
+++ b/Lib/test/test_unicode_identifiers.py
@@ -17,11 +17,9 @@ def test_non_bmp_normalized(self):
ππ«π¦π π¬π‘π’ = 1
self.assertIn("Unicode", dir())
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_invalid(self):
try:
- from test import badsyntax_3131
+ from test.tokenizedata import badsyntax_3131
except SyntaxError as err:
self.assertEqual(str(err),
"invalid character 'β¬' (U+20AC) (badsyntax_3131.py, line 2)")
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 29da4a25a3..2cf367a2cf 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -11,18 +11,21 @@
import sys
import unicodedata
import unittest
-from test.support import (open_urlresource, requires_resource, script_helper,
- cpython_only, check_disallow_instantiation,
- ResourceDenied)
+from test.support import (
+ open_urlresource,
+ requires_resource,
+ script_helper,
+ cpython_only,
+ check_disallow_instantiation,
+ force_not_colorized,
+)
class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes
- expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326'
+ expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e'
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
@requires_resource('cpu')
def test_method_checksum(self):
h = hashlib.sha1()
@@ -74,9 +77,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
# Update this if the database changes. Make sure to do a full rebuild
# (e.g. 'make distclean && make') to get the correct checksum.
- expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370'
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
+ expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba'
+
@requires_resource('cpu')
def test_function_checksum(self):
data = []
@@ -94,6 +96,8 @@ def test_function_checksum(self):
self.db.decomposition(char),
str(self.db.mirrored(char)),
str(self.db.combining(char)),
+ unicodedata.east_asian_width(char),
+ self.db.name(char, ""),
]
h.update(''.join(data).encode("ascii"))
result = h.hexdigest()
@@ -106,8 +110,26 @@ def test_name_inverse_lookup(self):
if looked_name := self.db.name(char, None):
self.assertEqual(self.db.lookup(looked_name), char)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
+ def test_no_names_in_pua(self):
+ puas = [*range(0xe000, 0xf8ff),
+ *range(0xf0000, 0xfffff),
+ *range(0x100000, 0x10ffff)]
+ for i in puas:
+ char = chr(i)
+ self.assertRaises(ValueError, self.db.name, char)
+
+ def test_lookup_nonexistant(self):
+ # just make sure that lookup can fail
+ for nonexistant in [
+ "LATIN SMLL LETR A",
+ "OPEN HANDS SIGHS",
+ "DREGS",
+ "HANDBUG",
+ "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK",
+ "???",
+ ]:
+ self.assertRaises(KeyError, self.db.lookup, nonexistant)
+
def test_digit(self):
self.assertEqual(self.db.digit('A', None), None)
self.assertEqual(self.db.digit('9'), 9)
@@ -120,8 +142,6 @@ def test_digit(self):
self.assertRaises(TypeError, self.db.digit, 'xx')
self.assertRaises(ValueError, self.db.digit, 'x')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_numeric(self):
self.assertEqual(self.db.numeric('A',None), None)
self.assertEqual(self.db.numeric('9'), 9)
@@ -135,8 +155,6 @@ def test_numeric(self):
self.assertRaises(TypeError, self.db.numeric, 'xx')
self.assertRaises(ValueError, self.db.numeric, 'x')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_decimal(self):
self.assertEqual(self.db.decimal('A',None), None)
self.assertEqual(self.db.decimal('9'), 9)
@@ -159,8 +177,6 @@ def test_category(self):
self.assertRaises(TypeError, self.db.category)
self.assertRaises(TypeError, self.db.category, 'xx')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_bidirectional(self):
self.assertEqual(self.db.bidirectional('\uFFFE'), '')
self.assertEqual(self.db.bidirectional(' '), 'WS')
@@ -170,8 +186,6 @@ def test_bidirectional(self):
self.assertRaises(TypeError, self.db.bidirectional)
self.assertRaises(TypeError, self.db.bidirectional, 'xx')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_decomposition(self):
self.assertEqual(self.db.decomposition('\uFFFE'),'')
self.assertEqual(self.db.decomposition('\u00bc'), ' 0031 2044 0034')
@@ -188,8 +202,6 @@ def test_mirrored(self):
self.assertRaises(TypeError, self.db.mirrored)
self.assertRaises(TypeError, self.db.mirrored, 'xx')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_combining(self):
self.assertEqual(self.db.combining('\uFFFE'), 0)
self.assertEqual(self.db.combining('a'), 0)
@@ -217,8 +229,6 @@ def test_issue10254(self):
b = 'C\u0338' * 20 + '\xC7'
self.assertEqual(self.db.normalize('NFC', a), b)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_issue29456(self):
# Fix #29456
u1176_str_a = '\u1100\u1176\u11a8'
@@ -245,8 +255,23 @@ def test_east_asian_width(self):
self.assertEqual(eaw('\u2010'), 'A')
self.assertEqual(eaw('\U00020000'), 'W')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
+ def test_east_asian_width_unassigned(self):
+ eaw = self.db.east_asian_width
+ # unassigned
+ for char in '\u0530\u0ecf\u10c6\u20fc\uaaca\U000107bd\U000115f2':
+ self.assertEqual(eaw(char), 'N')
+ self.assertIs(self.db.name(char, None), None)
+
+ # unassigned but reserved for CJK
+ for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD':
+ self.assertEqual(eaw(char), 'W')
+ self.assertIs(self.db.name(char, None), None)
+
+ # private use areas
+ for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0':
+ self.assertEqual(eaw(char), 'A')
+ self.assertIs(self.db.name(char, None), None)
+
def test_east_asian_width_9_0_changes(self):
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
@@ -258,8 +283,7 @@ def test_disallow_instantiation(self):
# Ensure that the type disallows instantiation (bpo-43916)
check_disallow_instantiation(self, unicodedata.UCD)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
+ @force_not_colorized
def test_failed_import_during_compiling(self):
# Issue 4367
# Decoding \N escapes requires the unicodedata module. If it can't be
@@ -276,8 +300,6 @@ def test_failed_import_during_compiling(self):
"(can't load unicodedata module)"
self.assertIn(error, result.err.decode("ascii"))
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_decimal_numeric_consistent(self):
# Test that decimal and numeric are consistent,
# i.e. if a character has a decimal value,
@@ -291,8 +313,6 @@ def test_decimal_numeric_consistent(self):
count += 1
self.assertTrue(count >= 10) # should have tested at least the ASCII digits
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_digit_numeric_consistent(self):
# Test that digit and numeric are consistent,
# i.e. if a character has a digit value,
@@ -309,8 +329,6 @@ def test_digit_numeric_consistent(self):
def test_bug_1704793(self):
self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_ucd_510(self):
import unicodedata
# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
@@ -322,6 +340,7 @@ def test_ucd_510(self):
self.assertTrue("\u1d79".upper()=='\ua77d')
self.assertTrue(".".upper()=='.')
+ @requires_resource('cpu')
def test_bug_5828(self):
self.assertEqual("\u1d79".lower(), "\u1d79")
# Only U+0000 should have U+0000 as its upper/lower/titlecase variant
@@ -333,8 +352,6 @@ def test_bug_5828(self):
[0]
)
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
def test_bug_4971(self):
# LETTER DZ WITH CARON: DZ, Dz, dz
self.assertEqual("\u01c4".title(), "\u01c5")
@@ -364,6 +381,7 @@ def unistr(data):
return "".join([chr(x) for x in data])
@requires_resource('network')
+ @requires_resource('cpu')
def test_normalization(self):
TESTDATAFILE = "NormalizationTest.txt"
TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}"
diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py
index 51b4f6041e..74df52f541 100644
--- a/Lib/test/test_userstring.py
+++ b/Lib/test/test_userstring.py
@@ -7,8 +7,7 @@
from collections import UserString
class UserStringTest(
- string_tests.CommonTest,
- string_tests.MixinStrUnicodeUserStringTest,
+ string_tests.StringLikeTest,
unittest.TestCase
):
From 58602a3e811a790d28c686ab4b0c1462d7f4d501 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Sat, 12 Jul 2025 12:30:10 +0300
Subject: [PATCH 2/3] Apply RustPython patches
---
Lib/test/string_tests.py | 9 +++++++-
Lib/test/test_bytes.py | 26 ++++++++++++++++++---
Lib/test/test_str.py | 34 ++++++++++++++++++++++++++++
Lib/test/test_unicode_identifiers.py | 2 ++
Lib/test/test_unicodedata.py | 30 ++++++++++++++++++++++++
5 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 9bb0ce7bb5..c5831c47fc 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -438,7 +438,8 @@ def test_expandtabs(self):
self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
# This test is only valid when sizeof(int) == sizeof(void*) == 4.
- if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4:
+ # XXX RUSTPYTHON TODO: expandtabs overflow checks
+ if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4 and False:
self.checkraises(OverflowError,
'\ta\n\tb', 'expandtabs', sys.maxsize)
@@ -779,6 +780,7 @@ def test_replace_uses_two_way_maxcount(self):
self.checkequal(AABAA + "ccc",
AABAA + ABBA, 'replace', ABBA, "ccc", 2)
+ @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms")
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
'only applies to 32-bit platforms')
def test_replace_overflow(self):
@@ -1246,6 +1248,9 @@ def test___contains__(self):
self.checkequal(False, 'asd', '__contains__', 'asdf')
self.checkequal(False, '', '__contains__', 'asdf')
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_subscript(self):
self.checkequal('a', 'abc', '__getitem__', 0)
self.checkequal('c', 'abc', '__getitem__', -1)
@@ -1499,6 +1504,8 @@ def test_none_arguments(self):
self.checkequal(True, s, 'startswith', 'h', None, -2)
self.checkequal(False, s, 'startswith', 'x', None, None)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
s = 'hello'
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index f8edfe0a17..e84df546a8 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -201,6 +201,8 @@ def test_constructor_value_errors(self):
self.assertRaises(ValueError, self.type2test, [sys.maxsize+1])
self.assertRaises(ValueError, self.type2test, [10**100])
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
@bigaddrspacetest
def test_constructor_overflow(self):
size = MAX_Py_ssize_t
@@ -324,6 +326,8 @@ def test_decode(self):
# Default encoding is utf-8
self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_check_encoding_errors(self):
# bpo-37388: bytes(str) and bytes.encode() must check encoding
# and errors arguments in dev mode
@@ -968,6 +972,8 @@ def test_integer_arguments_out_of_byte_range(self):
self.assertRaises(ValueError, method, 256)
self.assertRaises(ValueError, method, 9999)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
b = self.type2test(b'hello')
@@ -987,6 +993,8 @@ def test_find_etc_raise_correct_error_messages(self):
self.assertRaisesRegex(TypeError, r'\bendswith\b', b.endswith,
x, None, None, None)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_free_after_iterating(self):
test.support.check_free_after_iterating(self, iter, self.type2test)
test.support.check_free_after_iterating(self, reversed, self.type2test)
@@ -1575,6 +1583,11 @@ def test_irepeat_1char(self):
self.assertEqual(b, b1)
self.assertIs(b, b1)
+ # NOTE: RUSTPYTHON:
+ #
+ # The second instance of self.assertGreater was replaced with
+ # self.assertGreaterEqual since, in RustPython, the underlying storage
+ # is a Vec which doesn't require trailing null byte.
def test_alloc(self):
b = bytearray()
alloc = b.__alloc__()
@@ -1583,10 +1596,15 @@ def test_alloc(self):
for i in range(100):
b += b"x"
alloc = b.__alloc__()
- self.assertGreater(alloc, len(b)) # including trailing null byte
+ self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
if alloc not in seq:
seq.append(alloc)
+ # NOTE: RUSTPYTHON:
+ #
+ # The usages of self.assertGreater were replaced with
+ # self.assertGreaterEqual since, in RustPython, the underlying storage
+ # is a Vec which doesn't require trailing null byte.
def test_init_alloc(self):
b = bytearray()
def g():
@@ -1597,12 +1615,12 @@ def g():
self.assertEqual(len(b), len(a))
self.assertLessEqual(len(b), i)
alloc = b.__alloc__()
- self.assertGreater(alloc, len(b)) # including trailing null byte
+ self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
b.__init__(g())
self.assertEqual(list(b), list(range(1, 100)))
self.assertEqual(len(b), 99)
alloc = b.__alloc__()
- self.assertGreater(alloc, len(b))
+ self.assertGreaterEqual(alloc, len(b)) # NOTE: RUSTPYTHON patched
def test_extend(self):
orig = b'hello'
@@ -2071,6 +2089,7 @@ def test_join(self):
s3 = s1.join([b"abcd"])
self.assertIs(type(s3), self.basetype)
+ @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest")
def test_pickle(self):
a = self.type2test(b"abcd")
a.x = 10
@@ -2085,6 +2104,7 @@ def test_pickle(self):
self.assertEqual(type(a.z), type(b.z))
self.assertFalse(hasattr(b, 'y'))
+ @unittest.skip("TODO: RUSTPYTHON, Fails on ByteArraySubclassWithSlotsTest")
def test_copy(self):
a = self.type2test(b"abcd")
a.x = 10
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
index 46673cc56a..b2d585c5fc 100644
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -564,6 +564,7 @@ def __str__(self): return self.sval
self.checkraises(TypeError, ' ', 'join', [1, 2, 3])
self.checkraises(TypeError, ' ', 'join', ['1', '2', 3])
+ @unittest.skip("TODO: RUSTPYTHON, oom handling")
@unittest.skipIf(sys.maxsize > 2**32,
'needs too much memory on a 64-bit platform')
def test_join_overflow(self):
@@ -792,6 +793,8 @@ def test_isdecimal(self):
for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_isdigit(self):
super().test_isdigit()
self.checkequalnofix(True, '\u2460', 'isdigit')
@@ -937,6 +940,8 @@ def test_upper(self):
self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
self.assertEqual('\u2177'.upper(), '\u2167')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_capitalize(self):
string_tests.StringLikeTest.test_capitalize(self)
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
@@ -954,6 +959,8 @@ def test_capitalize(self):
self.assertEqual('ο¬nnish'.capitalize(), 'Finnish')
self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_title(self):
super().test_title()
self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -971,6 +978,8 @@ def test_title(self):
self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_swapcase(self):
string_tests.StringLikeTest.test_swapcase(self)
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
@@ -1070,6 +1079,8 @@ def test_issue18183(self):
'\U00100000'.ljust(3, '\U00010000')
'\U00100000'.rjust(3, '\U00010000')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_format(self):
self.assertEqual(''.format(), '')
self.assertEqual('a'.format(), 'a')
@@ -1453,16 +1464,21 @@ def __getitem__(self, key):
self.assertRaises(TypeError, '{a}'.format_map, [])
self.assertRaises(ZeroDivisionError, '{a}'.format_map, BadMapping())
+ @unittest.skip("TODO: RUSTPYTHON, killed for chewing up RAM")
def test_format_huge_precision(self):
format_string = ".{}f".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
result = format(2.34, format_string)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_format_huge_width(self):
format_string = "{}f".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
result = format(2.34, format_string)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_format_huge_item_number(self):
format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
with self.assertRaises(ValueError):
@@ -1498,6 +1514,8 @@ def __format__(self, spec):
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_formatting(self):
string_tests.StringLikeTest.test_formatting(self)
# Testing Unicode formatting strings...
@@ -1746,6 +1764,8 @@ def __str__(self):
'character buffers are decoded to unicode'
)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_constructor_keyword_args(self):
"""Pass various keyword argument combinations to the constructor."""
# The object argument can be passed as a keyword.
@@ -1755,6 +1775,8 @@ def test_constructor_keyword_args(self):
self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
self.assertEqual(str(object=b'foo', errors='strict'), 'foo')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_constructor_defaults(self):
"""Check the constructor argument defaults."""
# The object argument defaults to '' or b''.
@@ -1766,6 +1788,8 @@ def test_constructor_defaults(self):
# The errors argument defaults to strict.
self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_codecs_utf7(self):
utfTests = [
('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example
@@ -2275,6 +2299,8 @@ def test_codecs_errors(self):
self.assertRaises(ValueError, complex, "\ud800")
self.assertRaises(ValueError, complex, "\udf00")
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_codecs(self):
# Encoding
self.assertEqual('hello'.encode('ascii'), b'hello')
@@ -2404,6 +2430,8 @@ def test_ucs4(self):
else:
self.fail("Should have raised UnicodeDecodeError")
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_conversion(self):
# Make sure __str__() works properly
class StrWithStr(str):
@@ -2452,6 +2480,7 @@ def test_printable_repr(self):
# This test only affects 32-bit platforms because expandtabs can only take
# an int as the max value, not a 64-bit C long. If expandtabs is changed
# to take a 64-bit long, this test should apply to all platforms.
+ @unittest.skip("TODO: RUSTPYTHON, oom handling")
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
'only applies to 32-bit platforms')
def test_expandtabs_overflows_gracefully(self):
@@ -2462,6 +2491,7 @@ def test_expandtabs_optimization(self):
s = 'abc'
self.assertIs(s.expandtabs(), s)
+ @unittest.skip("TODO: RUSTPYTHON, aborted: memory allocation of 9223372036854775759 bytes failed")
def test_raiseMemError(self):
asciifields = "nnb"
compactfields = asciifields + "nP"
@@ -2601,10 +2631,14 @@ def test_compare(self):
self.assertTrue(astral >= bmp2)
self.assertFalse(astral >= astral2)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_free_after_iterating(self):
support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_check_encoding_errors(self):
# bpo-37388: str(bytes) and str.decode() must check encoding and errors
# arguments in dev mode
diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py
index 63c6c05582..60cfdaabe8 100644
--- a/Lib/test/test_unicode_identifiers.py
+++ b/Lib/test/test_unicode_identifiers.py
@@ -17,6 +17,8 @@ def test_non_bmp_normalized(self):
ππ«π¦π π¬π‘π’ = 1
self.assertIn("Unicode", dir())
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_invalid(self):
try:
from test.tokenizedata import badsyntax_3131
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 2cf367a2cf..4804ec297c 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -26,6 +26,8 @@ class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes
expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e'
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
@requires_resource('cpu')
def test_method_checksum(self):
h = hashlib.sha1()
@@ -79,6 +81,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
# (e.g. 'make distclean && make') to get the correct checksum.
expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba'
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
@requires_resource('cpu')
def test_function_checksum(self):
data = []
@@ -130,6 +134,8 @@ def test_lookup_nonexistant(self):
]:
self.assertRaises(KeyError, self.db.lookup, nonexistant)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_digit(self):
self.assertEqual(self.db.digit('A', None), None)
self.assertEqual(self.db.digit('9'), 9)
@@ -142,6 +148,8 @@ def test_digit(self):
self.assertRaises(TypeError, self.db.digit, 'xx')
self.assertRaises(ValueError, self.db.digit, 'x')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_numeric(self):
self.assertEqual(self.db.numeric('A',None), None)
self.assertEqual(self.db.numeric('9'), 9)
@@ -155,6 +163,8 @@ def test_numeric(self):
self.assertRaises(TypeError, self.db.numeric, 'xx')
self.assertRaises(ValueError, self.db.numeric, 'x')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_decimal(self):
self.assertEqual(self.db.decimal('A',None), None)
self.assertEqual(self.db.decimal('9'), 9)
@@ -177,6 +187,8 @@ def test_category(self):
self.assertRaises(TypeError, self.db.category)
self.assertRaises(TypeError, self.db.category, 'xx')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_bidirectional(self):
self.assertEqual(self.db.bidirectional('\uFFFE'), '')
self.assertEqual(self.db.bidirectional(' '), 'WS')
@@ -186,6 +198,8 @@ def test_bidirectional(self):
self.assertRaises(TypeError, self.db.bidirectional)
self.assertRaises(TypeError, self.db.bidirectional, 'xx')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_decomposition(self):
self.assertEqual(self.db.decomposition('\uFFFE'),'')
self.assertEqual(self.db.decomposition('\u00bc'), ' 0031 2044 0034')
@@ -202,6 +216,8 @@ def test_mirrored(self):
self.assertRaises(TypeError, self.db.mirrored)
self.assertRaises(TypeError, self.db.mirrored, 'xx')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_combining(self):
self.assertEqual(self.db.combining('\uFFFE'), 0)
self.assertEqual(self.db.combining('a'), 0)
@@ -229,6 +245,8 @@ def test_issue10254(self):
b = 'C\u0338' * 20 + '\xC7'
self.assertEqual(self.db.normalize('NFC', a), b)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_issue29456(self):
# Fix #29456
u1176_str_a = '\u1100\u1176\u11a8'
@@ -272,6 +290,8 @@ def test_east_asian_width_unassigned(self):
self.assertEqual(eaw(char), 'A')
self.assertIs(self.db.name(char, None), None)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_east_asian_width_9_0_changes(self):
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
@@ -283,6 +303,8 @@ def test_disallow_instantiation(self):
# Ensure that the type disallows instantiation (bpo-43916)
check_disallow_instantiation(self, unicodedata.UCD)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
@force_not_colorized
def test_failed_import_during_compiling(self):
# Issue 4367
@@ -300,6 +322,8 @@ def test_failed_import_during_compiling(self):
"(can't load unicodedata module)"
self.assertIn(error, result.err.decode("ascii"))
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_decimal_numeric_consistent(self):
# Test that decimal and numeric are consistent,
# i.e. if a character has a decimal value,
@@ -313,6 +337,8 @@ def test_decimal_numeric_consistent(self):
count += 1
self.assertTrue(count >= 10) # should have tested at least the ASCII digits
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_digit_numeric_consistent(self):
# Test that digit and numeric are consistent,
# i.e. if a character has a digit value,
@@ -329,6 +355,8 @@ def test_digit_numeric_consistent(self):
def test_bug_1704793(self):
self.assertEqual(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_ucd_510(self):
import unicodedata
# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
@@ -352,6 +380,8 @@ def test_bug_5828(self):
[0]
)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_bug_4971(self):
# LETTER DZ WITH CARON: DZ, Dz, dz
self.assertEqual("\u01c4".title(), "\u01c5")
From 2d8a3cb76c4a0f74c439b756110a837a8ff6c969 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:39:24 +0300
Subject: [PATCH 3/3] Mark new failing tests
---
Lib/test/string_tests.py | 2 ++
Lib/test/test_str.py | 4 ++++
Lib/test/test_unicodedata.py | 4 ++++
3 files changed, 10 insertions(+)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index c5831c47fc..3f82b515bb 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -154,6 +154,8 @@ def test_count(self):
self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
+ # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count
+ @unittest.expectedFailure
def test_count_keyword(self):
self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0))
self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1))
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
index b2d585c5fc..ef2d211a61 100644
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -112,6 +112,8 @@ def test_literals(self):
# raw strings should not have unicode escapes
self.assertNotEqual(r"\u0020", " ")
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_ascii(self):
self.assertEqual(ascii('abc'), "'abc'")
self.assertEqual(ascii('ab\\c'), "'ab\\\\c'")
@@ -2699,6 +2701,8 @@ def test_check_encoding_errors(self):
proc = assert_python_failure('-X', 'dev', '-c', code)
self.assertEqual(proc.rc, 10, proc)
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_str_invalid_call(self):
# too many args
with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"):
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 4804ec297c..7f49c1690f 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -122,6 +122,8 @@ def test_no_names_in_pua(self):
char = chr(i)
self.assertRaises(ValueError, self.db.name, char)
+ # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A'
+ @unittest.expectedFailure
def test_lookup_nonexistant(self):
# just make sure that lookup can fail
for nonexistant in [
@@ -273,6 +275,8 @@ def test_east_asian_width(self):
self.assertEqual(eaw('\u2010'), 'A')
self.assertEqual(eaw('\U00020000'), 'W')
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_east_asian_width_unassigned(self):
eaw = self.db.east_asian_width
# unassigned
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/RustPython/RustPython/pull/5953.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy