Skip to content

Commit 029835d

Browse files
gh-91404: Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or allocation failure (GH-32283) (GH-93882)
Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)" This reverts commit 6e3eee5. Manual fixups to increase the MAGIC number and to handle conflicts with a couple of changes that landed after that. Thanks for reviews by Ma Lin and Serhiy Storchaka. (cherry picked from commit 4beee0c) Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 5ee86d4 commit 029835d

File tree

9 files changed

+74
-146
lines changed

9 files changed

+74
-146
lines changed

Lib/re/_compiler.py

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,14 @@
2828
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
2929
}
3030

31-
class _CompileData:
32-
__slots__ = ('code', 'repeat_count')
33-
def __init__(self):
34-
self.code = []
35-
self.repeat_count = 0
36-
3731
def _combine_flags(flags, add_flags, del_flags,
3832
TYPE_FLAGS=_parser.TYPE_FLAGS):
3933
if add_flags & TYPE_FLAGS:
4034
flags &= ~TYPE_FLAGS
4135
return (flags | add_flags) & ~del_flags
4236

43-
def _compile(data, pattern, flags):
37+
def _compile(code, pattern, flags):
4438
# internal: compile a (sub)pattern
45-
code = data.code
4639
emit = code.append
4740
_len = len
4841
LITERAL_CODES = _LITERAL_CODES
@@ -115,19 +108,15 @@ def _compile(data, pattern, flags):
115108
skip = _len(code); emit(0)
116109
emit(av[0])
117110
emit(av[1])
118-
_compile(data, av[2], flags)
111+
_compile(code, av[2], flags)
119112
emit(SUCCESS)
120113
code[skip] = _len(code) - skip
121114
else:
122115
emit(REPEATING_CODES[op][0])
123116
skip = _len(code); emit(0)
124117
emit(av[0])
125118
emit(av[1])
126-
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
127-
if op != POSSESSIVE_REPEAT:
128-
emit(data.repeat_count)
129-
data.repeat_count += 1
130-
_compile(data, av[2], flags)
119+
_compile(code, av[2], flags)
131120
code[skip] = _len(code) - skip
132121
emit(REPEATING_CODES[op][1])
133122
elif op is SUBPATTERN:
@@ -136,7 +125,7 @@ def _compile(data, pattern, flags):
136125
emit(MARK)
137126
emit((group-1)*2)
138127
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
139-
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
128+
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
140129
if group:
141130
emit(MARK)
142131
emit((group-1)*2+1)
@@ -148,7 +137,7 @@ def _compile(data, pattern, flags):
148137
# pop their stack if they reach it
149138
emit(ATOMIC_GROUP)
150139
skip = _len(code); emit(0)
151-
_compile(data, av, flags)
140+
_compile(code, av, flags)
152141
emit(SUCCESS)
153142
code[skip] = _len(code) - skip
154143
elif op in SUCCESS_CODES:
@@ -163,7 +152,7 @@ def _compile(data, pattern, flags):
163152
if lo != hi:
164153
raise error("look-behind requires fixed-width pattern")
165154
emit(lo) # look behind
166-
_compile(data, av[1], flags)
155+
_compile(code, av[1], flags)
167156
emit(SUCCESS)
168157
code[skip] = _len(code) - skip
169158
elif op is AT:
@@ -182,7 +171,7 @@ def _compile(data, pattern, flags):
182171
for av in av[1]:
183172
skip = _len(code); emit(0)
184173
# _compile_info(code, av, flags)
185-
_compile(data, av, flags)
174+
_compile(code, av, flags)
186175
emit(JUMP)
187176
tailappend(_len(code)); emit(0)
188177
code[skip] = _len(code) - skip
@@ -210,12 +199,12 @@ def _compile(data, pattern, flags):
210199
emit(op)
211200
emit(av[0]-1)
212201
skipyes = _len(code); emit(0)
213-
_compile(data, av[1], flags)
202+
_compile(code, av[1], flags)
214203
if av[2]:
215204
emit(JUMP)
216205
skipno = _len(code); emit(0)
217206
code[skipyes] = _len(code) - skipyes + 1
218-
_compile(data, av[2], flags)
207+
_compile(code, av[2], flags)
219208
code[skipno] = _len(code) - skipno
220209
else:
221210
code[skipyes] = _len(code) - skipyes + 1
@@ -582,17 +571,17 @@ def isstring(obj):
582571
def _code(p, flags):
583572

584573
flags = p.state.flags | flags
585-
data = _CompileData()
574+
code = []
586575

587576
# compile info block
588-
_compile_info(data.code, p, flags)
577+
_compile_info(code, p, flags)
589578

590579
# compile the pattern
591-
_compile(data, p.data, flags)
580+
_compile(code, p.data, flags)
592581

593-
data.code.append(SUCCESS)
582+
code.append(SUCCESS)
594583

595-
return data
584+
return code
596585

597586
def _hex_code(code):
598587
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@@ -693,21 +682,14 @@ def print_2(*args):
693682
else:
694683
print_(FAILURE)
695684
i += 1
696-
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
685+
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
697686
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
698687
skip, min, max = code[i: i+3]
699688
if max == MAXREPEAT:
700689
max = 'MAXREPEAT'
701690
print_(op, skip, min, max, to=i+skip)
702691
dis_(i+3, i+skip)
703692
i += skip
704-
elif op is REPEAT:
705-
skip, min, max, repeat_index = code[i: i+4]
706-
if max == MAXREPEAT:
707-
max = 'MAXREPEAT'
708-
print_(op, skip, min, max, repeat_index, to=i+skip)
709-
dis_(i+4, i+skip)
710-
i += skip
711693
elif op is GROUPREF_EXISTS:
712694
arg, skip = code[i: i+2]
713695
print_(op, arg, skip, to=i+skip)
@@ -762,11 +744,11 @@ def compile(p, flags=0):
762744
else:
763745
pattern = None
764746

765-
data = _code(p, flags)
747+
code = _code(p, flags)
766748

767749
if flags & SRE_FLAG_DEBUG:
768750
print()
769-
dis(data.code)
751+
dis(code)
770752

771753
# map in either direction
772754
groupindex = p.state.groupdict
@@ -775,6 +757,7 @@ def compile(p, flags=0):
775757
indexgroup[i] = k
776758

777759
return _sre.compile(
778-
pattern, flags | p.state.flags, data.code,
779-
p.state.groups-1, groupindex, tuple(indexgroup),
780-
data.repeat_count)
760+
pattern, flags | p.state.flags, code,
761+
p.state.groups-1,
762+
groupindex, tuple(indexgroup)
763+
)

Lib/re/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# update when constants are added or removed
1515

16-
MAGIC = 20220423
16+
MAGIC = 20220615
1717

1818
from _sre import MAXREPEAT, MAXGROUPS
1919

Lib/test/test_re.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,12 +1796,9 @@ def test_dealloc(self):
17961796
long_overflow = 2**128
17971797
self.assertRaises(TypeError, re.finditer, "a", {})
17981798
with self.assertRaises(OverflowError):
1799-
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
1799+
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
18001800
with self.assertRaises(TypeError):
1801-
_sre.compile({}, 0, [], 0, [], [], 0)
1802-
with self.assertRaises(RuntimeError):
1803-
# invalid repeat_count -1
1804-
_sre.compile("abc", 0, [1], 0, {}, (), -1)
1801+
_sre.compile({}, 0, [], 0, [], [])
18051802

18061803
def test_search_dot_unicode(self):
18071804
self.assertTrue(re.search("123.*-", '123abc-'))
@@ -2540,27 +2537,6 @@ def test_possesive_repeat(self):
25402537
14. SUCCESS
25412538
''')
25422539

2543-
def test_repeat_index(self):
2544-
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
2545-
MIN_REPEAT 0 MAXREPEAT
2546-
LITERAL 97
2547-
LITERAL 98
2548-
MAX_REPEAT 0 MAXREPEAT
2549-
LITERAL 99
2550-
LITERAL 100
2551-
2552-
0. INFO 4 0b0 0 MAXREPEAT (to 5)
2553-
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
2554-
10. LITERAL 0x61 ('a')
2555-
12. LITERAL 0x62 ('b')
2556-
14: MIN_UNTIL
2557-
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
2558-
20. LITERAL 0x63 ('c')
2559-
22. LITERAL 0x64 ('d')
2560-
24: MAX_UNTIL
2561-
25. SUCCESS
2562-
''')
2563-
25642540

25652541
class PatternReprTests(unittest.TestCase):
25662542
def check(self, pattern, expected):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Revert the :mod:`re` memory leak when a match is terminated by a signal or
2+
memory allocation failure as the implemented fix caused a major performance
3+
regression.

Modules/_sre/clinic/sre.c.h

Lines changed: 7 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy