Content-Length: 15256 | pFad | http://github.com/python/cpython/pull/132574.patch
thub.com
From f0d20b254e2444632cdaea2552078ef5399ed0ad Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Tue, 15 Apr 2025 20:56:59 +1200
Subject: [PATCH 1/7] gh-127971: do not read past the end of a string
Fix off-by-one read beyond the end of a string.
---
Objects/stringlib/fastsearch.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 05e700b06258f0..520688c71b8d8b 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+ if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
else {
@@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
}
else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+ if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
}
From 462dddf4f89b611242fa5eafaa9098383c1ff074 Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Wed, 16 Apr 2025 12:11:50 +1200
Subject: [PATCH 2/7] Add test case
---
Lib/test/string_tests.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 4b82d51b4508ac..afa75411f982fd 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -767,6 +767,13 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
+ # gh-127971
+ any_3_nonblank_codepoints = '!!!'
+ seven_codepoints = any_3_nonblank_codepoints + ' ' + any_3_nonblank_codepoints
+ a = (' ' * 243) + seven_codepoints + (' ' * 7)
+ b = ' ' * 6 + chr(256)
+ a.replace(seven_codepoints, b)
+
def test_replace_uses_two_way_maxcount(self):
# Test that maxcount works in _two_way_count in fastsearch.h
A, B = "A"*1000, "B"*1000
From 1d0210a8e252cdb5102ecd326003301c30da5f1d Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Wed, 16 Apr 2025 12:01:16 +1200
Subject: [PATCH 3/7] Add blurb
---
.../2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst | 1 +
1 file changed, 1 insertion(+)
create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
new file mode 100644
index 00000000000000..a47268671e60ac
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
@@ -0,0 +1 @@
+Fix off-by-one read beyond the end of a string in string search
From 212452d7efca29140bd2066b7d522aaf26f43b7f Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Sat, 21 Jun 2025 12:23:36 +1200
Subject: [PATCH 4/7] Add test case for adaptive_find and comments better
explaining the reasons for and limitations of the tests.
---
Lib/test/string_tests.py | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index afa75411f982fd..6e6dcacceac1c4 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -767,13 +767,40 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
- # gh-127971
+ def test_replacement_on_buffer_boundary(self):
+
+ # gh-127971: Check we don't read past the end of the buffer when a
+ # potential match misses on the last character. Note this will likely
+ # not cause a failure unless ASAN is enabled, and even that may be
+ # dependent on implementation details subject to change.
any_3_nonblank_codepoints = '!!!'
seven_codepoints = any_3_nonblank_codepoints + ' ' + any_3_nonblank_codepoints
a = (' ' * 243) + seven_codepoints + (' ' * 7)
b = ' ' * 6 + chr(256)
a.replace(seven_codepoints, b)
+ def test_adaptive_find_on_buffer_boundary(self):
+
+ # gh-127971: This exercises the adaptive search algorithm to trigger a
+ # corner-case where it might examine the character *after* the last
+ # position that could be the start of the pattern.
+ #
+ # Unfortunately there is nothing to *test* to confirm whether the
+ # character is read or not, nor in fact does it matter for correctness
+ # with the implementation at time of writing: the adaptive algorithm is
+ # only triggered if the input is over a certain size and with a pattern
+ # with more than one character, so with the current implementation even
+ # though the final character read is not necessary or significant, it
+ # won't cause a fault.
+ #
+ # This test at least intentionally exercises this path, and might
+ # possibly catch a regression if the implementation changes and breaks
+ # those assumptions.
+ prefix = ' ' * (1024 * 4)
+ haystack = prefix + 'x'
+ needle = prefix + 'y'
+ self.assertEqual(haystack.find(needle), -1)
+
def test_replace_uses_two_way_maxcount(self):
# Test that maxcount works in _two_way_count in fastsearch.h
A, B = "A"*1000, "B"*1000
From c07c23eb921bdb737cbf9fccd60bbdde32426f7d Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Sun, 22 Jun 2025 11:49:30 +1200
Subject: [PATCH 5/7] Tweak conditional phrasing to match loop terminating
criteria, add comment explaining why a guard is not necessary in
adaptive_find.
---
Objects/stringlib/fastsearch.h | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 520688c71b8d8b..369034d52e8619 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
else {
@@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
}
else {
/* skip: check if next character is part of pattern */
- if (i < w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
}
@@ -667,7 +667,16 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
return res + count;
}
}
- /* miss: check if next character is part of pattern */
+
+ /* Miss: check if next character is part of pattern.
+ Note that in contrast to default_find and default_rfind we do
+ *not* need to prevent the algorithm from reading one character
+ beyond the last character in the input that the pattern could
+ start in. I.e. if i == w it is safe to read ss[i + 1] since the
+ input and pattern length requirements on when this variant
+ algorithm will be called ensure it will always be a valid part
+ of the input. In that case it doesn't matter what the character
+ read is since the loop will terminate regardless. */
if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
@@ -676,7 +685,9 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
}
}
else {
- /* skip: check if next character is part of pattern */
+ /* Skip: check if next character is part of pattern.
+ See comment above re safety of accessing ss[i+1] when i == w.
+ */
if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
From 338f32d9908e080084a5c0e99dbb99f3c4e1ace6 Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Sun, 13 Jul 2025 11:33:04 +1200
Subject: [PATCH 6/7] Address more reviewer feedback: add conditional to
adaptive find, tweak comments, drop extremely marginal "test"
---
Lib/test/string_tests.py | 29 ++-----------------
...-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst | 2 +-
Objects/stringlib/fastsearch.h | 19 +++---------
3 files changed, 8 insertions(+), 42 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 6e6dcacceac1c4..5cd3d7a62274bf 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -4,6 +4,7 @@
import unittest, string, sys, struct
from test import support
+from test.support import check_sanitizer
from test.support import import_helper
from collections import UserList
import random
@@ -767,40 +768,16 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
+ @unittest.skipUnless(check_sanitizer(address=True), "AddressSanitizer required")
def test_replacement_on_buffer_boundary(self):
-
# gh-127971: Check we don't read past the end of the buffer when a
- # potential match misses on the last character. Note this will likely
- # not cause a failure unless ASAN is enabled, and even that may be
- # dependent on implementation details subject to change.
+ # potential match misses on the last character.
any_3_nonblank_codepoints = '!!!'
seven_codepoints = any_3_nonblank_codepoints + ' ' + any_3_nonblank_codepoints
a = (' ' * 243) + seven_codepoints + (' ' * 7)
b = ' ' * 6 + chr(256)
a.replace(seven_codepoints, b)
- def test_adaptive_find_on_buffer_boundary(self):
-
- # gh-127971: This exercises the adaptive search algorithm to trigger a
- # corner-case where it might examine the character *after* the last
- # position that could be the start of the pattern.
- #
- # Unfortunately there is nothing to *test* to confirm whether the
- # character is read or not, nor in fact does it matter for correctness
- # with the implementation at time of writing: the adaptive algorithm is
- # only triggered if the input is over a certain size and with a pattern
- # with more than one character, so with the current implementation even
- # though the final character read is not necessary or significant, it
- # won't cause a fault.
- #
- # This test at least intentionally exercises this path, and might
- # possibly catch a regression if the implementation changes and breaks
- # those assumptions.
- prefix = ' ' * (1024 * 4)
- haystack = prefix + 'x'
- needle = prefix + 'y'
- self.assertEqual(haystack.find(needle), -1)
-
def test_replace_uses_two_way_maxcount(self):
# Test that maxcount works in _two_way_count in fastsearch.h
A, B = "A"*1000, "B"*1000
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
index a47268671e60ac..ced7a9c9fd3e63 100644
--- a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-01-13.gh-issue-127971.pMDOQ0.rst
@@ -1 +1 @@
-Fix off-by-one read beyond the end of a string in string search
+Fix off-by-one read beyond the end of a string in string search.
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 369034d52e8619..b447865c986bef 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -667,17 +667,8 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
return res + count;
}
}
-
- /* Miss: check if next character is part of pattern.
- Note that in contrast to default_find and default_rfind we do
- *not* need to prevent the algorithm from reading one character
- beyond the last character in the input that the pattern could
- start in. I.e. if i == w it is safe to read ss[i + 1] since the
- input and pattern length requirements on when this variant
- algorithm will be called ensure it will always be a valid part
- of the input. In that case it doesn't matter what the character
- read is since the loop will terminate regardless. */
- if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+ /* miss: check if next character is part of pattern */
+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
else {
@@ -685,10 +676,8 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
}
}
else {
- /* Skip: check if next character is part of pattern.
- See comment above re safety of accessing ss[i+1] when i == w.
- */
- if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+ /* skip: check if next character is part of pattern */
+ if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) {
i = i + m;
}
}
From 9884ec1af43684b1127e416b2980f3074214b7d6 Mon Sep 17 00:00:00 2001
From: Duane Griffin
Date: Mon, 14 Jul 2025 00:17:53 +1200
Subject: [PATCH 7/7] Always run test, not just under ASAN
---
Lib/test/string_tests.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 5cd3d7a62274bf..1814a55b74ea0c 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -4,7 +4,6 @@
import unittest, string, sys, struct
from test import support
-from test.support import check_sanitizer
from test.support import import_helper
from collections import UserList
import random
@@ -768,7 +767,6 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
- @unittest.skipUnless(check_sanitizer(address=True), "AddressSanitizer required")
def test_replacement_on_buffer_boundary(self):
# gh-127971: Check we don't read past the end of the buffer when a
# potential match misses on the last character.
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/python/cpython/pull/132574.patch
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy