Skip to content

Commit a09d390

Browse files
authored
[3.11] gh-96670: Raise SyntaxError when parsing NULL bytes (GH-97594) (#104195)
1 parent c5dafea commit a09d390

File tree

9 files changed

+77
-22
lines changed

9 files changed

+77
-22
lines changed

Include/cpython/fileobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#endif
44

55
PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
6+
PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);
67

78
/* The std printer acts as a preliminary sys.stderr until the new io
89
infrastructure is in place. */

Lib/test/test_ast.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,10 @@ def check_limit(prefix, repeated):
857857
check_limit("a", "[0]")
858858
check_limit("a", "*a")
859859

860+
def test_null_bytes(self):
861+
with self.assertRaises(SyntaxError,
862+
msg="source code string cannot contain null bytes"):
863+
ast.parse("a\0b")
860864

861865
class ASTHelpers_Test(unittest.TestCase):
862866
maxDiff = None

Lib/test/test_builtin.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,11 +334,10 @@ def test_compile(self):
334334
self.assertRaises(TypeError, compile)
335335
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
336336
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
337-
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
338337
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
339338
mode='eval', source='0', filename='tmp')
340339
compile('print("\xe5")\n', '', 'exec')
341-
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
340+
self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
342341
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
343342

344343
# test the optimize argument

Lib/test/test_cmd_line_script.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,31 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
657657
],
658658
)
659659

660+
def test_syntaxerror_null_bytes(self):
661+
script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
662+
with os_helper.temp_dir() as script_dir:
663+
script_name = _make_test_script(script_dir, 'script', script)
664+
exitcode, stdout, stderr = assert_python_failure(script_name)
665+
self.assertEqual(
666+
stderr.splitlines()[-2:],
667+
[ b" x = '",
668+
b'SyntaxError: source code cannot contain null bytes'
669+
],
670+
)
671+
672+
def test_syntaxerror_null_bytes_in_multiline_string(self):
673+
scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings
674+
with os_helper.temp_dir() as script_dir:
675+
for script in scripts:
676+
script_name = _make_test_script(script_dir, 'script', script)
677+
_, _, stderr = assert_python_failure(script_name)
678+
self.assertEqual(
679+
stderr.splitlines()[-2:],
680+
[ b" multilinestring",
681+
b'SyntaxError: source code cannot contain null bytes'
682+
]
683+
)
684+
660685
def test_consistent_sys_path_for_direct_execution(self):
661686
# This test case ensures that the following all give the same
662687
# sys.path configuration:

Lib/test/test_compile.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ def test_particularly_evil_undecodable(self):
542542
with open(fn, "wb") as fp:
543543
fp.write(src)
544544
res = script_helper.run_python_until_end(fn)[0]
545-
self.assertIn(b"Non-UTF-8", res.err)
545+
self.assertIn(b"source code cannot contain null bytes", res.err)
546546

547547
def test_yet_more_evil_still_undecodable(self):
548548
# Issue #25388
@@ -552,7 +552,7 @@ def test_yet_more_evil_still_undecodable(self):
552552
with open(fn, "wb") as fp:
553553
fp.write(src)
554554
res = script_helper.run_python_until_end(fn)[0]
555-
self.assertIn(b"Non-UTF-8", res.err)
555+
self.assertIn(b"source code cannot contain null bytes", res.err)
556556

557557
@support.cpython_only
558558
def test_compiler_recursion_limit(self):
@@ -588,9 +588,9 @@ def check_limit(prefix, repeated, mode="single"):
588588
def test_null_terminated(self):
589589
# The source code is null-terminated internally, but bytes-like
590590
# objects are accepted, which could be not terminated.
591-
with self.assertRaisesRegex(ValueError, "cannot contain null"):
591+
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
592592
compile("123\x00", "<dummy>", "eval")
593-
with self.assertRaisesRegex(ValueError, "cannot contain null"):
593+
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
594594
compile(memoryview(b"123\x00"), "<dummy>", "eval")
595595
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
596596
self.assertEqual(eval(code), 23)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The parser now raises :exc:`SyntaxError` when parsing source code containing
2+
null bytes. Backported from ``aab01e3``. Patch by Pablo Galindo

Objects/fileobject.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
230230
return 1;
231231
}
232232

233-
/*
234-
** Py_UniversalNewlineFgets is an fgets variation that understands
235-
** all of \r, \n and \r\n conventions.
236-
** The stream should be opened in binary mode.
237-
** The fobj parameter exists solely for legacy reasons and must be NULL.
238-
** Note that we need no error handling: fgets() treats error and eof
239-
** identically.
240-
*/
241233
char *
242-
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
234+
_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
243235
{
244236
char *p = buf;
245237
int c;
@@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
265257
}
266258
FUNLOCKFILE(stream);
267259
*p = '\0';
268-
if (p == buf)
260+
if (p == buf) {
269261
return NULL;
262+
}
263+
*size = p - buf;
270264
return buf;
271265
}
272266

267+
/*
268+
** Py_UniversalNewlineFgets is an fgets variation that understands
269+
** all of \r, \n and \r\n conventions.
270+
** The stream should be opened in binary mode.
271+
** The fobj parameter exists solely for legacy reasons and must be NULL.
272+
** Note that we need no error handling: fgets() treats error and eof
273+
** identically.
274+
*/
275+
276+
char *
277+
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
278+
size_t size;
279+
return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
280+
}
281+
273282
/* **************************** std printer ****************************
274283
* The stdprinter is used during the boot strapping phase as a preliminary
275284
* file like object for sys.stderr.

Parser/tokenizer.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
376376
return 1;
377377
}
378378

379+
static inline int
380+
contains_null_bytes(const char* str, size_t size) {
381+
return memchr(str, 0, size) != NULL;
382+
}
383+
379384
static int
380385
tok_readline_recode(struct tok_state *tok) {
381386
PyObject *line;
@@ -831,17 +836,17 @@ tok_readline_raw(struct tok_state *tok)
831836
if (!tok_reserve_buf(tok, BUFSIZ)) {
832837
return 0;
833838
}
834-
char *line = Py_UniversalNewlineFgets(tok->inp,
835-
(int)(tok->end - tok->inp),
836-
tok->fp, NULL);
839+
int n_chars = (int)(tok->end - tok->inp);
840+
size_t line_size = 0;
841+
char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
837842
if (line == NULL) {
838843
return 1;
839844
}
840845
if (tok->fp_interactive &&
841846
tok_concatenate_interactive_new_line(tok, line) == -1) {
842847
return 0;
843848
}
844-
tok->inp = strchr(tok->inp, '\0');
849+
tok->inp += line_size;
845850
if (tok->inp == tok->buf) {
846851
return 0;
847852
}
@@ -1078,6 +1083,12 @@ tok_nextc(struct tok_state *tok)
10781083
return EOF;
10791084
}
10801085
tok->line_start = tok->cur;
1086+
1087+
if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
1088+
syntaxerror(tok, "source code cannot contain null bytes");
1089+
tok->cur = tok->inp;
1090+
return EOF;
1091+
}
10811092
}
10821093
Py_UNREACHABLE();
10831094
}
@@ -1987,8 +1998,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19871998
/* Get rest of string */
19881999
while (end_quote_size != quote_size) {
19892000
c = tok_nextc(tok);
1990-
if (tok->done == E_DECODE)
2001+
if (tok->done == E_ERROR) {
2002+
return ERRORTOKEN;
2003+
}
2004+
if (tok->done == E_DECODE) {
19912005
break;
2006+
}
19922007
if (c == EOF || (quote_size == 1 && c == '\n')) {
19932008
assert(tok->multi_line_start != NULL);
19942009
// shift the tok_state's location into

Python/pythonrun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1859,7 +1859,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
18591859
}
18601860

18611861
if (strlen(str) != (size_t)size) {
1862-
PyErr_SetString(PyExc_ValueError,
1862+
PyErr_SetString(PyExc_SyntaxError,
18631863
"source code string cannot contain null bytes");
18641864
Py_CLEAR(*cmd_copy);
18651865
return NULL;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy