cpython: b07488490001 (original) (raw)

Mercurial > cpython

changeset 76425:b07488490001 3.2

Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the first two lines have non-UTF-8 characters without an encoding declaration. [#14629]

Martin v. Löwis martin@v.loewis.de
date Fri, 20 Apr 2012 14:36:47 +0200
parents 41c64c700e1e
children 98a6a57c5876 cdcc6b489862
files Lib/test/test_tokenize.py Lib/tokenize.py Misc/NEWS
diffstat 3 files changed, 18 insertions(+), 2 deletions(-)[+] [-] Lib/test/test_tokenize.py 10 Lib/tokenize.py 7 Misc/NEWS 3

line wrap: on

line diff

--- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -825,6 +825,16 @@ class TestDetectEncoding(TestCase): found, consumed_lines = detect_encoding(rl) self.assertEqual(found, "iso-8859-1")

+ + def test_utf8_normalization(self): # See get_normal_name() in tokenizer.c. encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

--- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -292,9 +292,12 @@ def detect_encoding(readline): def find_cookie(line): try:

matches = cookie_re.findall(line_string) if not matches:

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,9 @@ Core and Builtins Library ------- +- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the