cpython: 9ce5c1f371f7 (original) (raw)
Mercurial > cpython
changeset 96712:9ce5c1f371f7 3.4
Issue #20387: Merge [#20387]
Jason R. Coombs jaraco@jaraco.com | |
---|---|
date | Sun, 28 Jun 2015 11:10:29 -0400 |
parents | fd17e168b59f(current diff)330e28b28334(diff) |
children | 98380a6e037c c95d7ffa492e |
files | Misc/NEWS |
diffstat | 3 files changed, 40 insertions(+), 1 deletions(-)[+] [-] Lib/test/test_tokenize.py 21 Lib/tokenize.py 17 Misc/NEWS 3 |
line wrap: on
line diff
--- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -5,6 +5,8 @@ The tests can be really simple. Given a code, print out a table with tokens. The ENDMARKER is omitted for brevity.
+ >>> dump_tokens("1 + 1") ENCODING 'utf-8' (0, 0) (0, 0) NUMBER '1' (1, 0) (1, 1) @@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokeniz open as tokenize_open, Untokenizer) from io import BytesIO from unittest import TestCase, mock -import os, sys, glob +import os import token def dump_tokens(s): @@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase): self.assertEqual(untokenize(iter(tokens)), b'Hello ') +class TestRoundtrip(TestCase):
- def roundtrip(self, code):
if isinstance(code, str):[](#l1.27)
code = code.encode('utf-8')[](#l1.28)
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')[](#l1.29)
- def test_indentation_semantics_retained(self):
"""[](#l1.32)
Ensure that although whitespace might be mutated in a roundtrip,[](#l1.33)
the semantic meaning of the indentation remains consistent.[](#l1.34)
"""[](#l1.35)
code = "if False:\n\tx=3\n\tx=3\n"[](#l1.36)
codelines = self.roundtrip(code).split('\n')[](#l1.37)
self.assertEqual(codelines[1], codelines[2])[](#l1.38)
+ + test = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): @@ -1237,6 +1255,7 @@ def test_main(): support.run_unittest(TestDetectEncoding) support.run_unittest(TestTokenize) support.run_unittest(UntokenizeTest)
if name == "main": test_main()
--- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -244,6 +244,8 @@ class Untokenizer: def untokenize(self, iterable): it = iter(iterable)
indents = [][](#l2.7)
startline = False[](#l2.8) for t in it:[](#l2.9) if len(t) == 2:[](#l2.10) self.compat(t, it)[](#l2.11)
@@ -254,6 +256,21 @@ class Untokenizer: continue if tok_type == ENDMARKER: break
if tok_type == INDENT:[](#l2.16)
indents.append(token)[](#l2.17)
continue[](#l2.18)
elif tok_type == DEDENT:[](#l2.19)
indents.pop()[](#l2.20)
self.prev_row, self.prev_col = end[](#l2.21)
continue[](#l2.22)
elif tok_type in (NEWLINE, NL):[](#l2.23)
startline = True[](#l2.24)
elif startline and indents:[](#l2.25)
indent = indents[-1][](#l2.26)
if start[1] >= len(indent):[](#l2.27)
self.tokens.append(indent)[](#l2.28)
self.prev_col = len(indent)[](#l2.29)
startline = False[](#l2.30) self.add_whitespace(start)[](#l2.31) self.tokens.append(token)[](#l2.32) self.prev_row, self.prev_col = end[](#l2.33)
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -60,6 +60,9 @@ Core and Builtins Library ------- +- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize