[Python-checkins] cpython (merge 3.5 -> default): Issue #25388: Fixed tokenizer crash when processing undecodable source code
serhiy.storchaka
python-checkins at python.org
Sat Nov 14 08:14:59 EST 2015
https://hg.python.org/cpython/rev/ea0c4b811eae
changeset: 99137:ea0c4b811eae
parent: 99134:25a7ceed79d1
parent: 99136:e4a69eb34ad7
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sat Nov 14 15:12:04 2015 +0200
summary:
Issue #25388: Fixed tokenizer crash when processing undecodable source code
with a null byte.
files:
Lib/test/test_compile.py | 10 ++++++++++
Misc/NEWS | 3 +++
Parser/tokenizer.c | 14 ++++++--------
3 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -516,6 +516,16 @@
res = script_helper.run_python_until_end(fn)[0]
self.assertIn(b"Non-UTF-8", res.err)
+ def test_yet_more_evil_still_undecodable(self):
+ # Issue #25388
+ src = b"#\x00\n#\xfd\n"
+ with tempfile.TemporaryDirectory() as tmpd:
+ fn = os.path.join(tmpd, "bad.py")
+ with open(fn, "wb") as fp:
+ fp.write(src)
+ res = script_helper.run_python_until_end(fn)[0]
+ self.assertIn(b"Non-UTF-8", res.err)
+
@support.cpython_only
def test_compiler_recursion_limit(self):
# Expected limit is sys.getrecursionlimit() * the scaling factor
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #25388: Fixed tokenizer crash when processing undecodable source code
+ with a null byte.
+
- Issue #25462: The hash of the key now is calculated only once in most
operations in C implementation of OrderedDict.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -196,7 +196,8 @@
tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
PyMem_FREE(tok->buf);
- tok->buf = NULL;
+ tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+ tok->done = E_DECODE;
return NULL; /* as if it were EOF */
}
@@ -952,11 +953,6 @@
}
buflen = PyBytes_GET_SIZE(u);
buf = PyBytes_AS_STRING(u);
- if (!buf) {
- Py_DECREF(u);
- tok->done = E_DECODE;
- return EOF;
- }
newtok = PyMem_MALLOC(buflen+1);
strcpy(newtok, buf);
Py_DECREF(u);
@@ -998,7 +994,6 @@
if (tok->buf != NULL)
PyMem_FREE(tok->buf);
tok->buf = newtok;
- tok->line_start = tok->buf;
tok->cur = tok->buf;
tok->line_start = tok->buf;
tok->inp = strchr(tok->buf, '\0');
@@ -1021,7 +1016,8 @@
}
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
tok) == NULL) {
- tok->done = E_EOF;
+ if (!tok->decoding_erred)
+ tok->done = E_EOF;
done = 1;
}
else {
@@ -1055,6 +1051,8 @@
return EOF;
}
tok->buf = newbuf;
+ tok->cur = tok->buf + cur;
+ tok->line_start = tok->cur;
tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize;
tok->start = curstart < 0 ? NULL :
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list