cpython: ea0c4b811eae (original) (raw)
Mercurial > cpython
changeset 99137:ea0c4b811eae
Issue #25388: Fixed tokenizer crash when processing undecodable source code with a null byte. [#25388]
Serhiy Storchaka storchaka@gmail.com | |
---|---|
date | Sat, 14 Nov 2015 15:12:04 +0200 |
parents | 25a7ceed79d1(current diff)e4a69eb34ad7(diff) |
children | 1412be96faf0 |
files | Misc/NEWS Parser/tokenizer.c |
diffstat | 3 files changed, 19 insertions(+), 8 deletions(-)[+] [-] Lib/test/test_compile.py 10 Misc/NEWS 3 Parser/tokenizer.c 14 |
line wrap: on
line diff
--- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -516,6 +516,16 @@ if 1: res = script_helper.run_python_until_end(fn)[0] self.assertIn(b"Non-UTF-8", res.err)
- def test_yet_more_evil_still_undecodable(self):
# Issue #25388[](#l1.8)
src = b"#\x00\n#\xfd\n"[](#l1.9)
with tempfile.TemporaryDirectory() as tmpd:[](#l1.10)
fn = os.path.join(tmpd, "bad.py")[](#l1.11)
with open(fn, "wb") as fp:[](#l1.12)
fp.write(src)[](#l1.13)
res = script_helper.run_python_until_end(fn)[0][](#l1.14)
self.assertIn(b"Non-UTF-8", res.err)[](#l1.15)
+ @support.cpython_only def test_compiler_recursion_limit(self): # Expected limit is sys.getrecursionlimit() * the scaling factor
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ Release date: XXXX-XX-XX Core and Builtins ----------------- +- Issue #25388: Fixed tokenizer crash when processing undecodable source code
- Issue #25462: The hash of the key now is calculated only once in most operations in C implementation of OrderedDict.
--- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -196,7 +196,8 @@ error_ret(struct tok_state tok) / XXX tok->decoding_erred = 1; if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ PyMem_FREE(tok->buf);
- tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
- tok->done = E_DECODE; return NULL; /* as if it were EOF */ }
@@ -952,11 +953,6 @@ tok_nextc(struct tok_state *tok) } buflen = PyBytes_GET_SIZE(u); buf = PyBytes_AS_STRING(u);
if (!buf) {[](#l3.17)
Py_DECREF(u);[](#l3.18)
tok->done = E_DECODE;[](#l3.19)
return EOF;[](#l3.20)
}[](#l3.21) newtok = PyMem_MALLOC(buflen+1);[](#l3.22) strcpy(newtok, buf);[](#l3.23) Py_DECREF(u);[](#l3.24)
@@ -998,7 +994,6 @@ tok_nextc(struct tok_state *tok) if (tok->buf != NULL) PyMem_FREE(tok->buf); tok->buf = newtok;
tok->line_start = tok->buf;[](#l3.29) tok->cur = tok->buf;[](#l3.30) tok->line_start = tok->buf;[](#l3.31) tok->inp = strchr(tok->buf, '\0');[](#l3.32)
@@ -1021,7 +1016,8 @@ tok_nextc(struct tok_state *tok) } if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), tok) == NULL) {
tok->done = E_EOF;[](#l3.37)
if (!tok->decoding_erred)[](#l3.38)
tok->done = E_EOF;[](#l3.39) done = 1;[](#l3.40) }[](#l3.41) else {[](#l3.42)
@@ -1055,6 +1051,8 @@ tok_nextc(struct tok_state *tok) return EOF; } tok->buf = newbuf;
tok->cur = tok->buf + cur;[](#l3.47)
tok->line_start = tok->cur;[](#l3.48) tok->inp = tok->buf + curvalid;[](#l3.49) tok->end = tok->buf + newsize;[](#l3.50) tok->start = curstart < 0 ? NULL :[](#l3.51)