[3.6] bpo-33899: Make tokenize module mirror end-of-file is end-of-li… · python/cpython@11c36a3 (original) (raw)
1
1
`from test import support
`
2
2
`from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
`
3
3
`STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
`
4
``
`-
open as tokenize_open, Untokenizer)
`
``
4
`+
open as tokenize_open, Untokenizer, generate_tokens,
`
``
5
`+
NEWLINE)
`
5
6
`from io import BytesIO
`
6
7
`import unittest
`
7
8
`from unittest import TestCase, mock
`
`@@ -11,27 +12,51 @@
`
11
12
`import token
`
12
13
``
13
14
``
``
15
`+
Converts a source string into a list of textual representation
`
``
16
`+
of the tokens such as:
`
``
17
`` +
NAME 'if' (1, 0) (1, 2)
``
``
18
`+
to make writing tests easier.
`
``
19
`+
def stringify_tokens_from_source(token_generator, source_string):
`
``
20
`+
result = []
`
``
21
`+
num_lines = len(source_string.splitlines())
`
``
22
`+
missing_trailing_nl = source_string[-1] not in '\r\n'
`
``
23
+
``
24
`+
for type, token, start, end, line in token_generator:
`
``
25
`+
if type == ENDMARKER:
`
``
26
`+
break
`
``
27
`+
Ignore the new line on the last line if the input lacks one
`
``
28
`+
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
`
``
29
`+
continue
`
``
30
`+
type = tok_name[type]
`
``
31
`+
result.append(f" {type:10} {token!r:13} {start} {end}")
`
``
32
+
``
33
`+
return result
`
``
34
+
14
35
`class TokenizeTest(TestCase):
`
15
36
`# Tests for the tokenize module.
`
16
37
``
17
38
`# The tests can be really simple. Given a small fragment of source
`
18
``
`-
code, print out a table with tokens. The ENDMARKER is omitted for
`
19
``
`-
brevity.
`
``
39
`+
code, print out a table with tokens. The ENDMARKER, ENCODING and
`
``
40
`+
final NEWLINE are omitted for brevity.
`
20
41
``
21
42
`def check_tokenize(self, s, expected):
`
22
43
`# Format the tokens in s in a table format.
`
23
``
`-
The ENDMARKER is omitted.
`
24
``
`-
result = []
`
``
44
`+
The ENDMARKER and final NEWLINE are omitted.
`
25
45
`f = BytesIO(s.encode('utf-8'))
`
26
``
`-
for type, token, start, end, line in tokenize(f.readline):
`
27
``
`-
if type == ENDMARKER:
`
28
``
`-
break
`
29
``
`-
type = tok_name[type]
`
30
``
`-
result.append(f" {type:10} {token!r:13} {start} {end}")
`
``
46
`+
result = stringify_tokens_from_source(tokenize(f.readline), s)
`
``
47
+
31
48
`self.assertEqual(result,
`
32
49
` [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
`
33
50
`expected.rstrip().splitlines())
`
34
51
``
``
52
`+
def test_implicit_newline(self):
`
``
53
`+
Make sure that the tokenizer puts in an implicit NEWLINE
`
``
54
`+
when the input lacks a trailing new line.
`
``
55
`+
f = BytesIO("x".encode('utf-8'))
`
``
56
`+
tokens = list(tokenize(f.readline))
`
``
57
`+
self.assertEqual(tokens[-2].type, NEWLINE)
`
``
58
`+
self.assertEqual(tokens[-1].type, ENDMARKER)
`
``
59
+
35
60
`def test_basic(self):
`
36
61
`self.check_tokenize("1 + 1", """\
`
37
62
` NUMBER '1' (1, 0) (1, 1)
`
`@@ -993,8 +1018,8 @@ def readline():
`
993
1018
`else:
`
994
1019
`return b''
`
995
1020
``
996
``
`-
skip the initial encoding token and the end token
`
997
``
`-
tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
`
``
1021
`+
skip the initial encoding token and the end tokens
`
``
1022
`+
tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
`
998
1023
`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
`
999
1024
`self.assertEqual(tokens, expected_tokens,
`
1000
1025
`"bytes not decoded with encoding")
`
`@@ -1010,8 +1035,8 @@ def readline():
`
1010
1035
`else:
`
1011
1036
`return b''
`
1012
1037
``
1013
``
`-
skip the end token
`
1014
``
`-
tokens = list(_tokenize(readline, encoding=None))[:-1]
`
``
1038
`+
skip the end tokens
`
``
1039
`+
tokens = list(_tokenize(readline, encoding=None))[:-2]
`
1015
1040
`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
`
1016
1041
`self.assertEqual(tokens, expected_tokens,
`
1017
1042
`"string not tokenized when encoding is None")
`
`@@ -1322,18 +1347,21 @@ def test_oneline_defs(self):
`
1322
1347
``
1323
1348
`# Test that 500 consequent, one-line defs is OK
`
1324
1349
`toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
`
1325
``
`-
self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
`
``
1350
`+
self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
`
``
1351
`+
[-2] is always NEWLINE
`
1326
1352
``
1327
1353
`def assertExactTypeEqual(self, opstr, *optypes):
`
1328
1354
`tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
`
1329
1355
`num_optypes = len(optypes)
`
1330
``
`-
self.assertEqual(len(tokens), 2 + num_optypes)
`
``
1356
`+
self.assertEqual(len(tokens), 3 + num_optypes)
`
1331
1357
`self.assertEqual(token.tok_name[tokens[0].exact_type],
`
1332
1358
`token.tok_name[ENCODING])
`
1333
1359
`for i in range(num_optypes):
`
1334
1360
`self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
`
1335
1361
`token.tok_name[optypes[i]])
`
1336
1362
`self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
`
``
1363
`+
token.tok_name[token.NEWLINE])
`
``
1364
`+
self.assertEqual(token.tok_name[tokens[2 + num_optypes].exact_type],
`
1337
1365
`token.tok_name[token.ENDMARKER])
`
1338
1366
``
1339
1367
`def test_exact_type(self):
`
`@@ -1484,7 +1512,7 @@ def test_roundtrip(self):
`
1484
1512
`self.check_roundtrip("if x == 1:\n"
`
1485
1513
`" print(x)\n")
`
1486
1514
`self.check_roundtrip("# This is a comment\n"
`
1487
``
`-
"# This also")
`
``
1515
`+
"# This also\n")
`
1488
1516
``
1489
1517
`# Some people use different formatting conventions, which makes
`
1490
1518
`# untokenize a little trickier. Note that this test involves trailing
`