(original) (raw)

changeset: 99137:ea0c4b811eae parent: 99134:25a7ceed79d1 parent: 99136:e4a69eb34ad7 user: Serhiy Storchaka storchaka@gmail.com date: Sat Nov 14 15:12:04 2015 +0200 files: Misc/NEWS Parser/tokenizer.c description: Issue #25388: Fixed tokenizer crash when processing undecodable source code with a null byte. diff -r 25a7ceed79d1 -r ea0c4b811eae Lib/test/test_compile.py --- a/Lib/test/test_compile.py Sat Nov 14 12:52:08 2015 +0000 +++ b/Lib/test/test_compile.py Sat Nov 14 15:12:04 2015 +0200 @@ -516,6 +516,16 @@ res = script_helper.run_python_until_end(fn)[0] self.assertIn(b"Non-UTF-8", res.err) + def test_yet_more_evil_still_undecodable(self): + # Issue #25388 + src = b"#\x00\n#\xfd\n" + with tempfile.TemporaryDirectory() as tmpd: + fn = os.path.join(tmpd, "bad.py") + with open(fn, "wb") as fp: + fp.write(src) + res = script_helper.run_python_until_end(fn)[0] + self.assertIn(b"Non-UTF-8", res.err) + @support.cpython_only def test_compiler_recursion_limit(self): # Expected limit is sys.getrecursionlimit() * the scaling factor diff -r 25a7ceed79d1 -r ea0c4b811eae Misc/NEWS --- a/Misc/NEWS Sat Nov 14 12:52:08 2015 +0000 +++ b/Misc/NEWS Sat Nov 14 15:12:04 2015 +0200 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #25388: Fixed tokenizer crash when processing undecodable source code + with a null byte. + - Issue #25462: The hash of the key now is calculated only once in most operations in C implementation of OrderedDict. diff -r 25a7ceed79d1 -r ea0c4b811eae Parser/tokenizer.c --- a/Parser/tokenizer.c Sat Nov 14 12:52:08 2015 +0000 +++ b/Parser/tokenizer.c Sat Nov 14 15:12:04 2015 +0200 @@ -196,7 +196,8 @@ tok->decoding_erred = 1; if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ PyMem_FREE(tok->buf); - tok->buf = NULL; + tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; + tok->done = E_DECODE; return NULL; /* as if it were EOF */ } @@ -952,11 +953,6 @@ } buflen = PyBytes_GET_SIZE(u); buf = PyBytes_AS_STRING(u); - if (!buf) { - Py_DECREF(u); - tok->done = E_DECODE; - return EOF; - } newtok = PyMem_MALLOC(buflen+1); strcpy(newtok, buf); Py_DECREF(u); @@ -998,7 +994,6 @@ if (tok->buf != NULL) PyMem_FREE(tok->buf); tok->buf = newtok; - tok->line_start = tok->buf; tok->cur = tok->buf; tok->line_start = tok->buf; tok->inp = strchr(tok->buf, '\0'); @@ -1021,7 +1016,8 @@ } if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), tok) == NULL) { - tok->done = E_EOF; + if (!tok->decoding_erred) + tok->done = E_EOF; done = 1; } else { @@ -1055,6 +1051,8 @@ return EOF; } tok->buf = newbuf; + tok->cur = tok->buf + cur; + tok->line_start = tok->cur; tok->inp = tok->buf + curvalid; tok->end = tok->buf + newsize; tok->start = curstart < 0 ? NULL : /storchaka@gmail.com