cpython: 259745f9a1e4 (original) (raw)

Mercurial > cpython

changeset 104848:259745f9a1e4

Issue 28128: Print out better error/warning messages for invalid string escapes.

Eric V. Smith eric@trueblade.com
date Mon, 31 Oct 2016 09:22:08 -0400
parents 66f255754ce9
children fb672afd0151
files Include/bytesobject.h Include/unicodeobject.h Lib/test/test_string_literals.py Lib/test/test_unicode.py Misc/NEWS Objects/bytesobject.c Objects/unicodeobject.c Python/ast.c
diffstat 8 files changed, 173 insertions(+), 22 deletions(-)[+] [-] Include/bytesobject.h 5 Include/unicodeobject.h 11 Lib/test/test_string_literals.py 27 Lib/test/test_unicode.py 7 Misc/NEWS 4 Objects/bytesobject.c 37 Objects/unicodeobject.c 38 Python/ast.c 66

line wrap: on

line diff

--- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -74,6 +74,11 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex( PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, const char ); +/ Helper for PyBytes_DecodeEscape that detects invalid escape chars. */ +PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,

/* Macro, trading safety for speed */ #ifndef Py_LIMITED_API

--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1486,6 +1486,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUn const char errors / error handling / ); +/ Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape

+); + PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyObject unicode / Unicode object */ );

--- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -31,6 +31,7 @@ import os import sys import shutil import tempfile +import warnings import unittest @@ -104,6 +105,19 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, r""" '\U000000' """) self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)

+ def test_eval_str_raw(self): self.assertEqual(eval(""" r'x' """), 'x') self.assertEqual(eval(r""" r'\x01' """), '\' + 'x01') @@ -130,6 +144,19 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, r""" b'\x' """) self.assertRaises(SyntaxError, eval, r""" b'\x0' """)

+ def test_eval_bytes_raw(self): self.assertEqual(eval(""" br'x' """), b'x') self.assertEqual(eval(""" rb'x' """), b'x')

--- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2413,13 +2413,6 @@ class UnicodeTest(string_tests.CommonTes support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str)

- class CAPITest(unittest.TestCase):

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.7.0 alpha 1 Core and Builtins ----------------- +- Issue #28128: Deprecation warning for invalid str and byte escape

--- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1105,11 +1105,12 @@ static char * return p; } -PyObject *PyBytes_DecodeEscape(const char *s, +PyObject *_PyBytes_DecodeEscape(const char *s, Py_ssize_t len, const char *errors, Py_ssize_t unicode,

{ int c; char *p; @@ -1123,6 +1124,8 @@ PyObject *PyBytes_DecodeEscape(const cha return NULL; writer.overallocate = 1;

+ end = s + len; while (s < end) { if (*s != '\') { @@ -1207,9 +1210,12 @@ PyObject *PyBytes_DecodeEscape(const cha break; default:

@@ -1222,6 +1228,29 @@ PyObject *PyBytes_DecodeEscape(const cha return NULL; } +PyObject *PyBytes_DecodeEscape(const char *s,

+{

+ +} /* -------------------------------------------------------------------- / / object api */

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5877,9 +5877,10 @@ PyUnicode_AsUTF16String(PyObject *unicod static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s,

+_PyUnicode_DecodeUnicodeEscape(const char *s,

{ const char *starts = s; _PyUnicodeWriter writer; @@ -5887,6 +5888,9 @@ PyUnicode_DecodeUnicodeEscape(const char PyObject *errorHandler = NULL; PyObject *exc = NULL;

+ if (size == 0) { _Py_RETURN_UNICODE_EMPTY(); } @@ -6061,9 +6065,10 @@ PyUnicode_DecodeUnicodeEscape(const char goto error; default:

@@ -6098,6 +6103,27 @@ PyUnicode_DecodeUnicodeEscape(const char return NULL; } +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s,

+{

+} + /* Return a Unicode-Escape string version of the Unicode object. If quotes is true, the string is enclosed in u"" or u'' quotes as

--- a/Python/ast.c +++ b/Python/ast.c @@ -4113,8 +4113,34 @@ decode_utf8(struct compiling *c, const c return PyUnicode_DecodeUTF8(t, s - t, NULL); } +static int +warn_invalid_escape_sequence(struct compiling *c, const node *n,

+{

+} + static PyObject * -decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) +decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,

{ PyObject *v, *u; char *buf; @@ -4167,11 +4193,41 @@ decode_unicode_with_escapes(struct compi len = p - buf; s = buf;

+

+static PyObject * +decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,

+{

+

+} + /* Compile this expression in to an expr_ty. Add parens around the expression, in order to allow leading spaces in the expression. */ static expr_ty @@ -4310,7 +4366,7 @@ done: literal_end-literal_start, NULL, NULL); else

@@ -5048,12 +5104,12 @@ parsestr(struct compiling *c, const node if (*rawmode) *result = PyBytes_FromStringAndSize(s, len); else