diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 6f402513fd..3f82b515bb 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -8,18 +8,12 @@ from collections import UserList import random + class Sequence: def __init__(self, seq='wxyz'): self.seq = seq def __len__(self): return len(self.seq) def __getitem__(self, i): return self.seq[i] -class BadSeq1(Sequence): - def __init__(self): self.seq = [7, 'hello', 123] - def __str__(self): return '{0} {1} {2}'.format(*self.seq) - -class BadSeq2(Sequence): - def __init__(self): self.seq = ['a', 'b', 'c'] - def __len__(self): return 8 class BaseTest: # These tests are for buffers of values (bytes) and not @@ -27,7 +21,7 @@ class BaseTest: # and various string implementations # The type to be tested - # Change in subclasses to change the behaviour of fixtesttype() + # Change in subclasses to change the behaviour of fixtype() type2test = None # Whether the "contained items" of the container are integers in @@ -36,7 +30,7 @@ class BaseTest: contains_bytes = False # All tests pass their arguments to the testing methods - # as str objects. fixtesttype() can be used to propagate + # as str objects. fixtype() can be used to propagate # these arguments to the appropriate type def fixtype(self, obj): if isinstance(obj, str): @@ -160,6 +154,14 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) + # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count + @unittest.expectedFailure + def test_count_keyword(self): + self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) + self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) + self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2)) + self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3)) + def test_find(self): self.checkequal(0, 'abcdefghiabc', 'find', 'abc') self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) @@ -327,11 +329,12 @@ def reference_find(p, s): for i in range(len(s)): if s.startswith(p, i): return i + if p == '' and s == '': + return 0 return -1 - rr = random.randrange - choices = random.choices - for _ in range(1000): + def check_pattern(rr): + choices = random.choices p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20) p = p0[:len(p0) - rr(10)] # pop off some characters left = ''.join(choices('abcdef', k=rr(2000))) @@ -341,6 +344,49 @@ def reference_find(p, s): self.checkequal(reference_find(p, text), text, 'find', p) + rr = random.randrange + for _ in range(1000): + check_pattern(rr) + + # Test that empty string always work: + check_pattern(lambda *args: 0) + + def test_find_many_lengths(self): + haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)] + haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats] + + needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)] + needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats] + + for n, haystack1 in haystacks: + haystack2 = haystack1[:-1] + for m, needle in needles: + answer1 = 5 * (n - m) if m <= n else -1 + self.assertEqual(haystack1.find(needle), answer1, msg=(n,m)) + self.assertEqual(haystack2.find(needle), -1, msg=(n,m)) + + def test_adaptive_find(self): + # This would be very slow for the naive algorithm, + # but str.find() should be O(n + m). + for N in 1000, 10_000, 100_000, 1_000_000: + A, B = 'a' * N, 'b' * N + haystack = A + A + B + A + A + needle = A + B + B + A + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + + def test_find_with_memory(self): + # Test the "Skip with memory" path in the two-way algorithm. + for N in 1000, 3000, 10_000, 30_000: + needle = 'ab' * N + haystack = ('ab'*(N-1) + 'b') * 2 + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + def test_find_shift_table_overflow(self): """When the table of 8-bit shifts overflows.""" N = 2**8 + 100 @@ -724,6 +770,18 @@ def test_replace(self): self.checkraises(TypeError, 'hello', 'replace', 42, 'h') self.checkraises(TypeError, 'hello', 'replace', 'h', 42) + def test_replace_uses_two_way_maxcount(self): + # Test that maxcount works in _two_way_count in fastsearch.h + A, B = "A"*1000, "B"*1000 + AABAA = A + A + B + A + A + ABBA = A + B + B + A + self.checkequal(AABAA + ABBA, + AABAA + ABBA, 'replace', ABBA, "ccc", 0) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 1) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 2) + @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') @@ -734,8 +792,6 @@ def test_replace_overflow(self): self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - # Python 3.9 def test_removeprefix(self): self.checkequal('am', 'spam', 'removeprefix', 'sp') self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam') @@ -754,7 +810,6 @@ def test_removeprefix(self): self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42) self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l")) - # Python 3.9 def test_removesuffix(self): self.checkequal('sp', 'spam', 'removesuffix', 'am') self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam') @@ -1053,7 +1108,7 @@ def test_splitlines(self): self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) -class CommonTest(BaseTest): +class StringLikeTest(BaseTest): # This testcase contains tests that can be used in all # stringlike classes. Currently this is str and UserString. @@ -1084,11 +1139,6 @@ def test_capitalize_nonascii(self): self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') - -class MixinStrUnicodeUserStringTest: - # additional tests that only work for - # stringlike objects, i.e. str, UserString - def test_startswith(self): self.checkequal(True, 'hello', 'startswith', 'he') self.checkequal(True, 'hello', 'startswith', 'hello') @@ -1273,8 +1323,11 @@ def test_join(self): self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ('a' * i,) * i) - #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) - self.checkequal('a b c', ' ', 'join', BadSeq2()) + class LiesAboutLengthSeq(Sequence): + def __init__(self): self.seq = ['a', 'b', 'c'] + def __len__(self): return 8 + + self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq()) self.checkraises(TypeError, ' ', 'join') self.checkraises(TypeError, ' ', 'join', None) @@ -1459,19 +1512,19 @@ def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, + self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, + self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, + self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, + self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, + self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, + self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, + self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith, x, None, None, None) # issue #15534 diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 3c634b6cac..e84df546a8 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -10,6 +10,7 @@ import sys import copy import functools +import operator import pickle import tempfile import textwrap @@ -46,6 +47,10 @@ def __index__(self): class BaseBytesTest: + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_basics(self): b = self.type2test() self.assertEqual(type(b), self.type2test) @@ -737,6 +742,37 @@ def check(fmt, vals, result): check(b'%i%b %*.*b', (10, b'3', 5, 3, b'abc',), b'103 abc') check(b'%c', b'a', b'a') + class PseudoFloat: + def __init__(self, value): + self.value = float(value) + def __int__(self): + return int(self.value) + + pi = PseudoFloat(3.1415) + + exceptions_params = [ + ('%x format: an integer is required, not float', b'%x', 3.14), + ('%X format: an integer is required, not float', b'%X', 2.11), + ('%o format: an integer is required, not float', b'%o', 1.79), + ('%x format: an integer is required, not PseudoFloat', b'%x', pi), + ('%x format: an integer is required, not complex', b'%x', 3j), + ('%X format: an integer is required, not complex', b'%X', 2j), + ('%o format: an integer is required, not complex', b'%o', 1j), + ('%u format: a real number is required, not complex', b'%u', 3j), + # See https://github.com/python/cpython/issues/130928 as for why + # the exception message contains '%d' instead of '%i'. + ('%d format: a real number is required, not complex', b'%i', 2j), + ('%d format: a real number is required, not complex', b'%d', 2j), + ( + r'%c requires an integer in range\(256\) or a single byte', + b'%c', pi + ), + ] + + for msg, format_bytes, value in exceptions_params: + with self.assertRaisesRegex(TypeError, msg): + operator.mod(format_bytes, value) + def test_imod(self): b = self.type2test(b'hello, %b!') orig = b @@ -995,13 +1031,13 @@ def test_translate(self): self.assertEqual(c, b'hllo') def test_sq_item(self): - _testcapi = import_helper.import_module('_testcapi') + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') obj = self.type2test((42,)) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, -2) + _testlimitedcapi.sequence_getitem(obj, -2) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, 1) - self.assertEqual(_testcapi.sequence_getitem(obj, 0), 42) + _testlimitedcapi.sequence_getitem(obj, 1) + self.assertEqual(_testlimitedcapi.sequence_getitem(obj, 0), 42) class BytesTest(BaseBytesTest, unittest.TestCase): @@ -1031,36 +1067,63 @@ def test_buffer_is_readonly(self): self.assertRaises(TypeError, f.readinto, b"") def test_custom(self): - class A: - def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A()), b'abc') - class A: pass - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return None - self.assertRaises(TypeError, bytes, A()) - class A: + self.assertEqual(bytes(BytesSubclass(b'abc')), b'abc') + self.assertEqual(BytesSubclass(OtherBytesSubclass(b'abc')), + BytesSubclass(b'abc')) + self.assertEqual(bytes(WithBytes(b'abc')), b'abc') + self.assertEqual(BytesSubclass(WithBytes(b'abc')), BytesSubclass(b'abc')) + + class NoBytes: pass + self.assertRaises(TypeError, bytes, NoBytes()) + self.assertRaises(TypeError, bytes, WithBytes('abc')) + self.assertRaises(TypeError, bytes, WithBytes(None)) + class IndexWithBytes: def __bytes__(self): return b'a' def __index__(self): return 42 - self.assertEqual(bytes(A()), b'a') + self.assertEqual(bytes(IndexWithBytes()), b'a') # Issue #25766 - class A(str): + class StrWithBytes(str): + def __new__(cls, value): + self = str.__new__(cls, '\u20ac') + self.value = value + return self def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A('\u20ac')), b'abc') - self.assertEqual(bytes(A('\u20ac'), 'iso8859-15'), b'\xa4') + return self.value + self.assertEqual(bytes(StrWithBytes(b'abc')), b'abc') + self.assertEqual(bytes(StrWithBytes(b'abc'), 'iso8859-15'), b'\xa4') + self.assertEqual(bytes(StrWithBytes(BytesSubclass(b'abc'))), b'abc') + self.assertEqual(BytesSubclass(StrWithBytes(b'abc')), BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(b'abc'), 'iso8859-15'), + BytesSubclass(b'\xa4')) + self.assertEqual(BytesSubclass(StrWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Issue #24731 - class A: + self.assertTypedEqual(bytes(WithBytes(BytesSubclass(b'abc'))), BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + + class BytesWithBytes(bytes): + def __new__(cls, value): + self = bytes.__new__(cls, b'\xa4') + self.value = value + return self def __bytes__(self): - return OtherBytesSubclass(b'abc') - self.assertEqual(bytes(A()), b'abc') - self.assertIs(type(bytes(A())), OtherBytesSubclass) - self.assertEqual(BytesSubclass(A()), b'abc') - self.assertIs(type(BytesSubclass(A())), BytesSubclass) + return self.value + self.assertTypedEqual(bytes(BytesWithBytes(b'abc')), b'abc') + self.assertTypedEqual(BytesSubclass(BytesWithBytes(b'abc')), + BytesSubclass(b'abc')) + self.assertTypedEqual(bytes(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Test PyBytes_FromFormat() def test_from_format(self): @@ -1233,6 +1296,8 @@ class SubBytes(bytes): class ByteArrayTest(BaseBytesTest, unittest.TestCase): type2test = bytearray + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') + def test_getitem_error(self): b = bytearray(b'python') msg = "bytearray indices must be integers or slices" @@ -1325,47 +1390,73 @@ def by(s): self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")]) def test_setitem(self): - b = bytearray([1, 2, 3]) - b[1] = 100 - self.assertEqual(b, bytearray([1, 100, 3])) - b[-1] = 200 - self.assertEqual(b, bytearray([1, 100, 200])) - b[0] = Indexable(10) - self.assertEqual(b, bytearray([10, 100, 200])) - try: - b[3] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[-10] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[0] = 256 - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = Indexable(-1) - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = None - self.fail("Didn't raise TypeError") - except TypeError: - pass + def setitem_as_mapping(b, i, val): + b[i] = val + + def setitem_as_sequence(b, i, val): + self._testlimitedcapi.sequence_setitem(b, i, val) + + def do_tests(setitem): + b = bytearray([1, 2, 3]) + setitem(b, 1, 100) + self.assertEqual(b, bytearray([1, 100, 3])) + setitem(b, -1, 200) + self.assertEqual(b, bytearray([1, 100, 200])) + setitem(b, 0, Indexable(10)) + self.assertEqual(b, bytearray([10, 100, 200])) + try: + setitem(b, 3, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, -10, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, 0, 256) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, Indexable(-1)) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, object()) + self.fail("Didn't raise TypeError") + except TypeError: + pass + + with self.subTest("tp_as_mapping"): + do_tests(setitem_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(setitem_as_sequence) def test_delitem(self): - b = bytearray(range(10)) - del b[0] - self.assertEqual(b, bytearray(range(1, 10))) - del b[-1] - self.assertEqual(b, bytearray(range(1, 9))) - del b[4] - self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + def del_as_mapping(b, i): + del b[i] + + def del_as_sequence(b, i): + self._testlimitedcapi.sequence_delitem(b, i) + + def do_tests(delete): + b = bytearray(range(10)) + delete(b, 0) + self.assertEqual(b, bytearray(range(1, 10))) + delete(b, -1) + self.assertEqual(b, bytearray(range(1, 9))) + delete(b, 4) + self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + + with self.subTest("tp_as_mapping"): + do_tests(del_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(del_as_sequence) def test_setslice(self): b = bytearray(range(10)) @@ -1558,6 +1649,13 @@ def test_extend(self): a = bytearray(b'') a.extend([Indexable(ord('a'))]) self.assertEqual(a, b'a') + a = bytearray(b'abc') + self.assertRaisesRegex(TypeError, # Override for string. + "expected iterable of integers; got: 'str'", + a.extend, 'def') + self.assertRaisesRegex(TypeError, # But not for others. + "can't extend bytearray with float", + a.extend, 1.0) def test_remove(self): b = bytearray(b'hello') @@ -1747,6 +1845,8 @@ def test_repeat_after_setslice(self): self.assertEqual(b3, b'xcxcxc') def test_mutating_index(self): + # See gh-91153 + class Boom: def __index__(self): b.clear() @@ -1758,10 +1858,9 @@ def __index__(self): b[0] = Boom() with self.subTest("tp_as_sequence"): - _testcapi = import_helper.import_module('_testcapi') b = bytearray(b'Now you see me...') with self.assertRaises(IndexError): - _testcapi.sequence_setitem(b, 0, Boom()) + self._testlimitedcapi.sequence_setitem(b, 0, Boom()) class AssortedBytesTest(unittest.TestCase): @@ -2060,6 +2159,12 @@ class BytesSubclass(bytes): class OtherBytesSubclass(bytes): pass +class WithBytes: + def __init__(self, value): + self.value = value + def __bytes__(self): + return self.value + class ByteArraySubclassTest(SubclassTest, unittest.TestCase): basetype = bytearray type2test = ByteArraySubclass diff --git a/Lib/test/test_unicode.py b/Lib/test/test_str.py similarity index 94% rename from Lib/test/test_unicode.py rename to Lib/test/test_str.py index 1a8a8f7ee9..ef2d211a61 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_str.py @@ -7,6 +7,7 @@ """ import _string import codecs +import datetime import itertools import operator import pickle @@ -55,8 +56,22 @@ def duplicate_string(text): class StrSubclass(str): pass -class UnicodeTest(string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, +class OtherStrSubclass(str): + pass + +class WithStr: + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + +class WithRepr: + def __init__(self, value): + self.value = value + def __repr__(self): + return self.value + +class StrTest(string_tests.StringLikeTest, string_tests.MixinStrUnicodeTest, unittest.TestCase): @@ -84,6 +99,10 @@ def __repr__(self): self.assertEqual(realresult, result) self.assertTrue(object is not realresult) + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_literals(self): self.assertEqual('\xff', '\u00ff') self.assertEqual('\uffff', '\U0000ffff') @@ -93,6 +112,8 @@ def test_literals(self): # raw strings should not have unicode escapes self.assertNotEqual(r"\u0020", " ") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ascii(self): self.assertEqual(ascii('abc'), "'abc'") self.assertEqual(ascii('ab\\c'), "'ab\\\\c'") @@ -128,10 +149,13 @@ def test_ascii(self): self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096), ascii("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, ascii, WrongRepr()) + self.assertTypedEqual(ascii('\U0001f40d'), r"'\U0001f40d'") + self.assertTypedEqual(ascii(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(ascii(WithRepr('')), '') + self.assertTypedEqual(ascii(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(ascii(WithRepr('<\U0001f40d>')), r'<\U0001f40d>') + self.assertTypedEqual(ascii(WithRepr(StrSubclass('<\U0001f40d>'))), r'<\U0001f40d>') + self.assertRaises(TypeError, ascii, WithRepr(b'byte-repr')) def test_repr(self): # Test basic sanity of repr() @@ -169,10 +193,13 @@ def test_repr(self): self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096), repr("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, repr, WrongRepr()) + self.assertTypedEqual(repr('\U0001f40d'), "'\U0001f40d'") + self.assertTypedEqual(repr(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(repr(WithRepr('')), '') + self.assertTypedEqual(repr(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(repr(WithRepr('<\U0001f40d>')), '<\U0001f40d>') + self.assertTypedEqual(repr(WithRepr(StrSubclass('<\U0001f40d>'))), StrSubclass('<\U0001f40d>')) + self.assertRaises(TypeError, repr, WithRepr(b'byte-repr')) def test_iterators(self): # Make sure unicode objects have an __iter__ method @@ -213,7 +240,7 @@ def test_pickle_iterator(self): self.assertEqual(case, pickled) def test_count(self): - string_tests.CommonTest.test_count(self) + string_tests.StringLikeTest.test_count(self) # check mixed argument types self.checkequalnofix(3, 'aaa', 'count', 'a') self.checkequalnofix(0, 'aaa', 'count', 'b') @@ -243,7 +270,7 @@ class MyStr(str): self.checkequal(3, MyStr('aaa'), 'count', 'a') def test_find(self): - string_tests.CommonTest.test_find(self) + string_tests.StringLikeTest.test_find(self) # test implementation details of the memchr fast path self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102') self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201') @@ -288,7 +315,7 @@ def test_find(self): self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304') def test_rfind(self): - string_tests.CommonTest.test_rfind(self) + string_tests.StringLikeTest.test_rfind(self) # test implementation details of the memrchr fast path self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102') self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201') @@ -329,7 +356,7 @@ def test_rfind(self): self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102') def test_index(self): - string_tests.CommonTest.test_index(self) + string_tests.StringLikeTest.test_index(self) self.checkequalnofix(0, 'abcdefghiabc', 'index', '') self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def') self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc') @@ -353,7 +380,7 @@ def test_index(self): self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304') def test_rindex(self): - string_tests.CommonTest.test_rindex(self) + string_tests.StringLikeTest.test_rindex(self) self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '') self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def') self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc') @@ -449,7 +476,7 @@ def test_maketrans_translate(self): self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') def test_split(self): - string_tests.CommonTest.test_split(self) + string_tests.StringLikeTest.test_split(self) # test mixed kinds for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -466,7 +493,7 @@ def test_split(self): left + delim * 2 + right, 'split', delim *2) def test_rsplit(self): - string_tests.CommonTest.test_rsplit(self) + string_tests.StringLikeTest.test_rsplit(self) # test mixed kinds for left, right in ('ba', 'юё', '\u0101\u0100', '\U00010301\U00010300'): left *= 9 @@ -486,7 +513,7 @@ def test_rsplit(self): left + right, 'rsplit', None) def test_partition(self): - string_tests.MixinStrUnicodeUserStringTest.test_partition(self) + string_tests.StringLikeTest.test_partition(self) # test mixed kinds self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -503,7 +530,7 @@ def test_partition(self): left + delim * 2 + right, 'partition', delim * 2) def test_rpartition(self): - string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self) + string_tests.StringLikeTest.test_rpartition(self) # test mixed kinds self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -520,7 +547,7 @@ def test_rpartition(self): left + delim * 2 + right, 'rpartition', delim * 2) def test_join(self): - string_tests.MixinStrUnicodeUserStringTest.test_join(self) + string_tests.StringLikeTest.test_join(self) class MyWrapper: def __init__(self, sval): self.sval = sval @@ -548,7 +575,7 @@ def test_join_overflow(self): self.assertRaises(OverflowError, ''.join, seq) def test_replace(self): - string_tests.CommonTest.test_replace(self) + string_tests.StringLikeTest.test_replace(self) # method call forwarded from str implementation because of unicode argument self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) @@ -831,6 +858,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): @@ -859,7 +895,7 @@ def test_surrogates(self): def test_lower(self): - string_tests.CommonTest.test_lower(self) + string_tests.StringLikeTest.test_lower(self) self.assertEqual('\U00010427'.lower(), '\U0001044F') self.assertEqual('\U00010427\U00010427'.lower(), '\U0001044F\U0001044F') @@ -890,7 +926,7 @@ def test_casefold(self): self.assertEqual('\u00b5'.casefold(), '\u03bc') def test_upper(self): - string_tests.CommonTest.test_upper(self) + string_tests.StringLikeTest.test_upper(self) self.assertEqual('\U0001044F'.upper(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.upper(), '\U00010427\U00010427') @@ -909,7 +945,7 @@ def test_upper(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_capitalize(self): - string_tests.CommonTest.test_capitalize(self) + string_tests.StringLikeTest.test_capitalize(self) self.assertEqual('\U0001044F'.capitalize(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.capitalize(), '\U00010427\U0001044F') @@ -947,7 +983,7 @@ def test_title(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_swapcase(self): - string_tests.CommonTest.test_swapcase(self) + string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') self.assertEqual('\U00010427'.swapcase(), '\U0001044F') self.assertEqual('\U0001044F\U0001044F'.swapcase(), @@ -973,7 +1009,7 @@ def test_swapcase(self): self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300') def test_center(self): - string_tests.CommonTest.test_center(self) + string_tests.StringLikeTest.test_center(self) self.assertEqual('x'.center(2, '\U0010FFFF'), 'x\U0010FFFF') self.assertEqual('x'.center(3, '\U0010FFFF'), @@ -1483,7 +1519,7 @@ def __format__(self, spec): # TODO: RUSTPYTHON @unittest.expectedFailure def test_formatting(self): - string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) + string_tests.StringLikeTest.test_formatting(self) # Testing Unicode formatting strings... self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc') self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00') @@ -1659,7 +1695,7 @@ def test_startswith_endswith_errors(self): self.assertIn('str', exc) self.assertIn('tuple', exc) - @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') def test_format_float(self): # should not format with a comma, but always with C locale self.assertEqual('1.0', '%.1f' % 1.0) @@ -1730,8 +1766,6 @@ def __str__(self): 'character buffers are decoded to unicode' ) - self.assertRaises(TypeError, str, 42, 42, 42) - # TODO: RUSTPYTHON @unittest.expectedFailure def test_constructor_keyword_args(self): @@ -1910,6 +1944,12 @@ def test_utf8_decode_invalid_sequences(self): self.assertRaises(UnicodeDecodeError, (b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8') + def test_issue127903(self): + # gh-127903: ``_copy_characters`` crashes on DEBUG builds when + # there is nothing to copy. + d = datetime.datetime(2013, 11, 10, 14, 20, 59) + self.assertEqual(d.strftime('%z'), '') + def test_issue8271(self): # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered @@ -2396,28 +2436,37 @@ def test_ucs4(self): @unittest.expectedFailure def test_conversion(self): # Make sure __str__() works properly - class ObjectToStr: - def __str__(self): - return "foo" - - class StrSubclassToStr(str): - def __str__(self): - return "foo" - - class StrSubclassToStrSubclass(str): - def __new__(cls, content=""): - return str.__new__(cls, 2*content) - def __str__(self): + class StrWithStr(str): + def __new__(cls, value): + self = str.__new__(cls, "") + self.value = value return self + def __str__(self): + return self.value - self.assertEqual(str(ObjectToStr()), "foo") - self.assertEqual(str(StrSubclassToStr("bar")), "foo") - s = str(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclassToStrSubclass) - s = StrSubclass(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclass) + self.assertTypedEqual(str(WithStr('abc')), 'abc') + self.assertTypedEqual(str(WithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(StrWithStr('abc')), 'abc') + self.assertTypedEqual(str(StrWithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(WithRepr('')), '') + self.assertTypedEqual(str(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr('')), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(StrSubclass(''))), + StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(OtherStrSubclass(''))), + StrSubclass('')) def test_unicode_repr(self): class s1: @@ -2652,6 +2701,49 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_str_invalid_call(self): + # too many args + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str(1, "", "", 1) + + # no such kw arg + with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"): + str(test=1) + + # 'encoding' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, encoding=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", b"ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", encoding=b"ascii") + + # 'errors' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, "", errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", b"strict") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", errors=b"strict") + + # both positional and kwarg + with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"): + str(b"x", "utf-8", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "ignore", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "strict", errors="ignore") + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py index 80c22c6cdd..fe25bfe9f8 100644 --- a/Lib/test/test_unicode_file.py +++ b/Lib/test/test_unicode_file.py @@ -110,7 +110,7 @@ def _test_single(self, filename): os.unlink(filename) self.assertTrue(not os.path.exists(filename)) # and again with os.open. - f = os.open(filename, os.O_CREAT) + f = os.open(filename, os.O_CREAT | os.O_WRONLY) os.close(f) try: self._do_single(filename) diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py index 47619c8807..25c16e3a0b 100644 --- a/Lib/test/test_unicode_file_functions.py +++ b/Lib/test/test_unicode_file_functions.py @@ -5,7 +5,7 @@ import unittest import warnings from unicodedata import normalize -from test.support import os_helper +from test.support import is_apple, os_helper from test import support @@ -23,13 +23,13 @@ '10_\u1fee\u1ffd', ] -# Mac OS X decomposes Unicode names, using Normal Form D. +# Apple platforms decompose Unicode names, using Normal Form D. # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html # "However, most volume formats do not follow the exact specification for # these normal forms. For example, HFS Plus uses a variant of Normal Form D # in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through # U+2FAFF are not decomposed." -if sys.platform != 'darwin': +if not is_apple: filenames.extend([ # Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different '11_\u0385\u03d3\u03d4', @@ -119,11 +119,11 @@ def test_open(self): os.stat(name) self._apply_failure(os.listdir, name, self._listdir_failure) - # Skip the test on darwin, because darwin does normalize the filename to + # Skip the test on Apple platforms, because they don't normalize the filename to # NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC, # NFKD in Python is useless, because darwin will normalize it later and so # open(), os.stat(), etc. don't raise any exception. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') @unittest.skipIf( support.is_emscripten or support.is_wasi, "test fails on Emscripten/WASI when host platform is macOS." @@ -142,10 +142,10 @@ def test_normalize(self): self._apply_failure(os.remove, name) self._apply_failure(os.listdir, name) - # Skip the test on darwin, because darwin uses a normalization different + # Skip the test on Apple platforms, because they use a normalization different # than Python NFD normalization: filenames are different even if we use # Python NFD normalization. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') def test_listdir(self): sf0 = set(self.files) with warnings.catch_warnings(): diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index d7a0ece253..60cfdaabe8 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -21,7 +21,7 @@ def test_non_bmp_normalized(self): @unittest.expectedFailure def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 29da4a25a3..7f49c1690f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -11,15 +11,20 @@ import sys import unicodedata import unittest -from test.support import (open_urlresource, requires_resource, script_helper, - cpython_only, check_disallow_instantiation, - ResourceDenied) +from test.support import ( + open_urlresource, + requires_resource, + script_helper, + cpython_only, + check_disallow_instantiation, + force_not_colorized, +) class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326' + expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' # TODO: RUSTPYTHON @unittest.expectedFailure @@ -74,7 +79,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # Update this if the database changes. Make sure to do a full rebuild # (e.g. 'make distclean && make') to get the correct checksum. - expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370' + expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' + # TODO: RUSTPYTHON @unittest.expectedFailure @requires_resource('cpu') @@ -94,6 +100,8 @@ def test_function_checksum(self): self.db.decomposition(char), str(self.db.mirrored(char)), str(self.db.combining(char)), + unicodedata.east_asian_width(char), + self.db.name(char, ""), ] h.update(''.join(data).encode("ascii")) result = h.hexdigest() @@ -106,6 +114,28 @@ def test_name_inverse_lookup(self): if looked_name := self.db.name(char, None): self.assertEqual(self.db.lookup(looked_name), char) + def test_no_names_in_pua(self): + puas = [*range(0xe000, 0xf8ff), + *range(0xf0000, 0xfffff), + *range(0x100000, 0x10ffff)] + for i in puas: + char = chr(i) + self.assertRaises(ValueError, self.db.name, char) + + # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A' + @unittest.expectedFailure + def test_lookup_nonexistant(self): + # just make sure that lookup can fail + for nonexistant in [ + "LATIN SMLL LETR A", + "OPEN HANDS SIGHS", + "DREGS", + "HANDBUG", + "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK", + "???", + ]: + self.assertRaises(KeyError, self.db.lookup, nonexistant) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_digit(self): @@ -245,6 +275,25 @@ def test_east_asian_width(self): self.assertEqual(eaw('\u2010'), 'A') self.assertEqual(eaw('\U00020000'), 'W') + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_east_asian_width_unassigned(self): + eaw = self.db.east_asian_width + # unassigned + for char in '\u0530\u0ecf\u10c6\u20fc\uaaca\U000107bd\U000115f2': + self.assertEqual(eaw(char), 'N') + self.assertIs(self.db.name(char, None), None) + + # unassigned but reserved for CJK + for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD': + self.assertEqual(eaw(char), 'W') + self.assertIs(self.db.name(char, None), None) + + # private use areas + for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0': + self.assertEqual(eaw(char), 'A') + self.assertIs(self.db.name(char, None), None) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_east_asian_width_9_0_changes(self): @@ -260,6 +309,7 @@ def test_disallow_instantiation(self): # TODO: RUSTPYTHON @unittest.expectedFailure + @force_not_colorized def test_failed_import_during_compiling(self): # Issue 4367 # Decoding \N escapes requires the unicodedata module. If it can't be @@ -322,6 +372,7 @@ def test_ucd_510(self): self.assertTrue("\u1d79".upper()=='\ua77d') self.assertTrue(".".upper()=='.') + @requires_resource('cpu') def test_bug_5828(self): self.assertEqual("\u1d79".lower(), "\u1d79") # Only U+0000 should have U+0000 as its upper/lower/titlecase variant @@ -364,6 +415,7 @@ def unistr(data): return "".join([chr(x) for x in data]) @requires_resource('network') + @requires_resource('cpu') def test_normalization(self): TESTDATAFILE = "NormalizationTest.txt" TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}" diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 51b4f6041e..74df52f541 100644 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -7,8 +7,7 @@ from collections import UserString class UserStringTest( - string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, + string_tests.StringLikeTest, unittest.TestCase ): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy