cpython: 259745f9a1e4 (original) (raw)
Mercurial > cpython
changeset 104848:259745f9a1e4
Issue 28128: Print out better error/warning messages for invalid string escapes.
Eric V. Smith eric@trueblade.com | |
---|---|
date | Mon, 31 Oct 2016 09:22:08 -0400 |
parents | 66f255754ce9 |
children | fb672afd0151 |
files | Include/bytesobject.h Include/unicodeobject.h Lib/test/test_string_literals.py Lib/test/test_unicode.py Misc/NEWS Objects/bytesobject.c Objects/unicodeobject.c Python/ast.c |
diffstat | 8 files changed, 173 insertions(+), 22 deletions(-)[+] [-] Include/bytesobject.h 5 Include/unicodeobject.h 11 Lib/test/test_string_literals.py 27 Lib/test/test_unicode.py 7 Misc/NEWS 4 Objects/bytesobject.c 37 Objects/unicodeobject.c 38 Python/ast.c 66 |
line wrap: on
line diff
--- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -74,6 +74,11 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex( PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, const char ); +/ Helper for PyBytes_DecodeEscape that detects invalid escape chars. */ +PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,[](#l1.9)
const char *,[](#l1.10)
const char **);[](#l1.11)
/* Macro, trading safety for speed */ #ifndef Py_LIMITED_API
--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1486,6 +1486,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUn const char errors / error handling / ); +/ Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
- chars. / +PyAPI_FUNC(PyObject) _PyUnicode_DecodeUnicodeEscape(
const char *string, /* Unicode-Escape encoded string */[](#l2.10)
Py_ssize_t length, /* size of string */[](#l2.11)
const char *errors, /* error handling */[](#l2.12)
const char **first_invalid_escape /* on return, points to first[](#l2.13)
invalid escaped char in[](#l2.14)
string. */[](#l2.15)
+); + PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyObject unicode / Unicode object */ );
--- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -31,6 +31,7 @@ import os import sys import shutil import tempfile +import warnings import unittest @@ -104,6 +105,19 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, r""" '\U000000' """) self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
- def test_eval_str_invalid_escape(self):
for b in range(1, 128):[](#l3.16)
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":[](#l3.17)
continue[](#l3.18)
with self.assertWarns(DeprecationWarning):[](#l3.19)
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))[](#l3.20)
with warnings.catch_warnings(record=True) as w:[](#l3.21)
warnings.simplefilter('always', category=DeprecationWarning)[](#l3.22)
eval("'''\n\\z'''")[](#l3.23)
self.assertEqual(len(w), 1)[](#l3.24)
self.assertEqual(w[0].filename, '<string>')[](#l3.25)
self.assertEqual(w[0].lineno, 2)[](#l3.26)
+ def test_eval_str_raw(self): self.assertEqual(eval(""" r'x' """), 'x') self.assertEqual(eval(r""" r'\x01' """), '\' + 'x01') @@ -130,6 +144,19 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, r""" b'\x' """) self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
- def test_eval_bytes_invalid_escape(self):
for b in range(1, 128):[](#l3.36)
if b in b"""\n\r"'01234567\\abfnrtvx""":[](#l3.37)
continue[](#l3.38)
with self.assertWarns(DeprecationWarning):[](#l3.39)
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))[](#l3.40)
with warnings.catch_warnings(record=True) as w:[](#l3.41)
warnings.simplefilter('always', category=DeprecationWarning)[](#l3.42)
eval("b'''\n\\z'''")[](#l3.43)
self.assertEqual(len(w), 1)[](#l3.44)
self.assertEqual(w[0].filename, '<string>')[](#l3.45)
self.assertEqual(w[0].lineno, 2)[](#l3.46)
+ def test_eval_bytes_raw(self): self.assertEqual(eval(""" br'x' """), b'x') self.assertEqual(eval(""" rb'x' """), b'x')
--- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2413,13 +2413,6 @@ class UnicodeTest(string_tests.CommonTes support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str)
- def test_invalid_sequences(self):
for letter in string.ascii_letters + "89": # 0-7 are octal escapes[](#l4.8)
if letter in "abfnrtuvxNU":[](#l4.9)
continue[](#l4.10)
with self.assertWarns(DeprecationWarning):[](#l4.11)
eval(r"'\%s'" % letter)[](#l4.12)
- class CAPITest(unittest.TestCase):
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.7.0 alpha 1 Core and Builtins ----------------- +- Issue #28128: Deprecation warning for invalid str and byte escape
- sequences now prints better information about where the error
- occurs. Patch by Serhiy Storchaka and Eric Smith. +
- Issue #28509: dict.update() no longer allocate unnecessary large memory.
- Issue #28426: Fixed potential crash in PyUnicode_AsDecodedObject() in debug
--- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1105,11 +1105,12 @@ static char * return p; } -PyObject *PyBytes_DecodeEscape(const char *s, +PyObject *_PyBytes_DecodeEscape(const char *s, Py_ssize_t len, const char *errors, Py_ssize_t unicode,
const char *recode_encoding)[](#l6.12)
const char *recode_encoding,[](#l6.13)
const char **first_invalid_escape)[](#l6.14)
{ int c; char *p; @@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const cha return NULL; writer.overallocate = 1;
+ end = s + len; while (s < end) { if (*s != '\') { @@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const cha break; default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)[](#l6.31)
goto failed;[](#l6.32)
if (*first_invalid_escape == NULL) {[](#l6.33)
*first_invalid_escape = s-1; /* Back up one char, since we've[](#l6.34)
already incremented s. */[](#l6.35)
}[](#l6.36) *p++ = '\\';[](#l6.37)
s--;[](#l6.38) goto non_esc; /* an arbitrary number of unescaped[](#l6.39) UTF-8 bytes may follow. */[](#l6.40) }[](#l6.41)
@@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const cha return NULL; } +PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,[](#l6.47)
const char *errors,[](#l6.48)
Py_ssize_t unicode,[](#l6.49)
const char *recode_encoding)[](#l6.50)
- const char* first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
recode_encoding,[](#l6.54)
&first_invalid_escape);[](#l6.55)
- if (result == NULL)
return NULL;[](#l6.57)
- if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,[](#l6.59)
"invalid escape sequence '\\%c'",[](#l6.60)
*first_invalid_escape) < 0) {[](#l6.61)
Py_DECREF(result);[](#l6.62)
return NULL;[](#l6.63)
}[](#l6.64)
- }
- return result;
+ +} /* -------------------------------------------------------------------- / / object api */
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5877,9 +5877,10 @@ PyUnicode_AsUTF16String(PyObject *unicod static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size,[](#l7.8)
const char *errors)[](#l7.9)
+_PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size,[](#l7.11)
const char *errors,[](#l7.12)
const char **first_invalid_escape)[](#l7.13)
{ const char *starts = s; _PyUnicodeWriter writer; @@ -5887,6 +5888,9 @@ PyUnicode_DecodeUnicodeEscape(const char PyObject *errorHandler = NULL; PyObject *exc = NULL;
+ if (size == 0) { _Py_RETURN_UNICODE_EMPTY(); } @@ -6061,9 +6065,10 @@ PyUnicode_DecodeUnicodeEscape(const char goto error; default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,[](#l7.31)
"invalid escape sequence '\\%c'", c) < 0)[](#l7.32)
goto onError;[](#l7.33)
if (*first_invalid_escape == NULL) {[](#l7.34)
*first_invalid_escape = s-1; /* Back up one char, since we've[](#l7.35)
already incremented s. */[](#l7.36)
}[](#l7.37) WRITE_ASCII_CHAR('\\');[](#l7.38) WRITE_CHAR(c);[](#l7.39) continue;[](#l7.40)
@@ -6098,6 +6103,27 @@ PyUnicode_DecodeUnicodeEscape(const char return NULL; } +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s,
Py_ssize_t size,[](#l7.47)
const char *errors)[](#l7.48)
- const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
&first_invalid_escape);[](#l7.52)
- if (result == NULL)
return NULL;[](#l7.54)
- if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,[](#l7.56)
"invalid escape sequence '\\%c'",[](#l7.57)
*first_invalid_escape) < 0) {[](#l7.58)
Py_DECREF(result);[](#l7.59)
return NULL;[](#l7.60)
}[](#l7.61)
- }
- return result;
+} + /* Return a Unicode-Escape string version of the Unicode object. If quotes is true, the string is enclosed in u"" or u'' quotes as
--- a/Python/ast.c +++ b/Python/ast.c @@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const c return PyUnicode_DecodeUTF8(t, s - t, NULL); } +static int +warn_invalid_escape_sequence(struct compiling *c, const node *n,
char first_invalid_escape_char)[](#l8.9)
- PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \%c",
first_invalid_escape_char);[](#l8.12)
- if (msg == NULL) {
return -1;[](#l8.14)
- }
- if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
c->c_filename, LINENO(n),[](#l8.17)
NULL, NULL) < 0 &&[](#l8.18)
PyErr_ExceptionMatches(PyExc_DeprecationWarning))[](#l8.19)
- {
const char *s = PyUnicode_AsUTF8(msg);[](#l8.21)
if (s != NULL) {[](#l8.22)
ast_error(c, n, s);[](#l8.23)
}[](#l8.24)
Py_DECREF(msg);[](#l8.25)
return -1;[](#l8.26)
- }
- Py_DECREF(msg);
- return 0;
+} + static PyObject * -decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) +decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
size_t len)[](#l8.35)
{ PyObject *v, *u; char *buf; @@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compi len = p - buf; s = buf;
- const char *first_invalid_escape;
- v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
- if (v != NULL && first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {[](#l8.48)
/* We have not decref u before because first_invalid_escape points[](#l8.49)
inside u. */[](#l8.50)
Py_XDECREF(u);[](#l8.51)
Py_DECREF(v);[](#l8.52)
return NULL;[](#l8.53)
}[](#l8.54)
- } Py_XDECREF(u); return v; }
+static PyObject * +decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
size_t len)[](#l8.62)
- const char *first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
&first_invalid_escape);[](#l8.66)
- if (result == NULL)
return NULL;[](#l8.68)
- if (first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {[](#l8.71)
Py_DECREF(result);[](#l8.72)
return NULL;[](#l8.73)
}[](#l8.74)
- }
- return result;
+} + /* Compile this expression in to an expr_ty. Add parens around the expression, in order to allow leading spaces in the expression. */ static expr_ty @@ -4310,7 +4366,7 @@ done: literal_end-literal_start, NULL, NULL); else
*literal = decode_unicode_with_escapes(c, literal_start,[](#l8.86)
*literal = decode_unicode_with_escapes(c, n, literal_start,[](#l8.87) literal_end-literal_start);[](#l8.88) if (!*literal)[](#l8.89) return -1;[](#l8.90)
@@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node if (*rawmode) *result = PyBytes_FromStringAndSize(s, len); else
*result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);[](#l8.95)
} else { if (*rawmode) *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); else*result = decode_bytes_with_escapes(c, n, s, len);[](#l8.96)
*result = decode_unicode_with_escapes(c, s, len);[](#l8.101)