bpo-33899: Revert tokenize module adding an implicit final NEWLINE (G… · python/cpython@a1f45ec (original) (raw)
1
1
`from test import test_support
`
2
``
`-
from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
`
``
2
`+
from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
`
3
3
`STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
`
4
4
`from StringIO import StringIO
`
5
5
`import os
`
6
6
`from unittest import TestCase
`
7
7
``
8
8
``
9
``
`-
Converts a source string into a list of textual representation
`
10
``
`-
of the tokens such as:
`
11
``
`` -
NAME 'if' (1, 0) (1, 2)
``
12
``
`-
to make writing tests easier.
`
13
``
`-
def stringify_tokens_from_source(token_generator, source_string):
`
14
``
`-
result = []
`
15
``
`-
num_lines = len(source_string.splitlines())
`
16
``
`-
missing_trailing_nl = source_string[-1] not in '\r\n'
`
17
``
-
18
``
`-
for type, token, start, end, line in token_generator:
`
19
``
`-
if type == ENDMARKER:
`
20
``
`-
break
`
21
``
`-
Ignore the new line on the last line if the input lacks one
`
22
``
`-
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
`
23
``
`-
continue
`
24
``
`-
type = tok_name[type]
`
25
``
`-
result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
`
26
``
`-
locals())
`
27
``
-
28
``
`-
return result
`
29
``
-
30
9
`class TokenizeTest(TestCase):
`
31
10
`# Tests for the tokenize module.
`
32
11
``
33
12
`# The tests can be really simple. Given a small fragment of source
`
34
``
`-
code, print out a table with tokens. The ENDMARKER, ENCODING and
`
35
``
`-
final NEWLINE are omitted for brevity.
`
``
13
`+
code, print out a table with tokens. The ENDMARKER is omitted for
`
``
14
`+
brevity.
`
36
15
``
37
16
`def check_tokenize(self, s, expected):
`
38
17
`# Format the tokens in s in a table format.
`
``
18
`+
The ENDMARKER is omitted.
`
``
19
`+
result = []
`
39
20
`f = StringIO(s)
`
40
``
`-
result = stringify_tokens_from_source(generate_tokens(f.readline), s)
`
41
``
-
``
21
`+
for type, token, start, end, line in generate_tokens(f.readline):
`
``
22
`+
if type == ENDMARKER:
`
``
23
`+
break
`
``
24
`+
type = tok_name[type]
`
``
25
`+
result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
`
``
26
`+
locals())
`
42
27
`self.assertEqual(result,
`
43
28
`expected.rstrip().splitlines())
`
44
29
``
45
``
`-
def test_implicit_newline(self):
`
46
``
`-
Make sure that the tokenizer puts in an implicit NEWLINE
`
47
``
`-
when the input lacks a trailing new line.
`
48
``
`-
f = StringIO("x")
`
49
``
`-
tokens = list(generate_tokens(f.readline))
`
50
``
`-
self.assertEqual(tokens[-2][0], NEWLINE)
`
51
``
`-
self.assertEqual(tokens[-1][0], ENDMARKER)
`
52
30
``
53
31
`def test_basic(self):
`
54
32
`self.check_tokenize("1 + 1", """\
`
`@@ -638,7 +616,7 @@ def test_roundtrip(self):
`
638
616
`self.check_roundtrip("if x == 1:\n"
`
639
617
`" print x\n")
`
640
618
`self.check_roundtrip("# This is a comment\n"
`
641
``
`-
"# This also\n")
`
``
619
`+
"# This also")
`
642
620
``
643
621
`# Some people use different formatting conventions, which makes
`
644
622
`# untokenize a little trickier. Note that this test involves trailing
`