[3.7] bpo-33899: Make tokenize module mirror end-of-file is end-of-li… · python/cpython@ab75d9e (original) (raw)

1

1

`from test import support

`

2

2

`from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

`

3

3

`STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

`

4

``

`-

open as tokenize_open, Untokenizer)

`

``

4

`+

open as tokenize_open, Untokenizer, generate_tokens,

`

``

5

`+

NEWLINE)

`

5

6

`from io import BytesIO

`

6

7

`import unittest

`

7

8

`from unittest import TestCase, mock

`

`@@ -11,27 +12,51 @@

`

11

12

`import token

`

12

13

``

13

14

``

``

15

`+

Converts a source string into a list of textual representation

`

``

16

`+

of the tokens such as:

`

``

17

`` +

NAME 'if' (1, 0) (1, 2)

``

``

18

`+

to make writing tests easier.

`

``

19

`+

def stringify_tokens_from_source(token_generator, source_string):

`

``

20

`+

result = []

`

``

21

`+

num_lines = len(source_string.splitlines())

`

``

22

`+

missing_trailing_nl = source_string[-1] not in '\r\n'

`

``

23

+

``

24

`+

for type, token, start, end, line in token_generator:

`

``

25

`+

if type == ENDMARKER:

`

``

26

`+

break

`

``

27

`+

Ignore the new line on the last line if the input lacks one

`

``

28

`+

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

`

``

29

`+

continue

`

``

30

`+

type = tok_name[type]

`

``

31

`+

result.append(f" {type:10} {token!r:13} {start} {end}")

`

``

32

+

``

33

`+

return result

`

``

34

+

14

35

`class TokenizeTest(TestCase):

`

15

36

`# Tests for the tokenize module.

`

16

37

``

17

38

`# The tests can be really simple. Given a small fragment of source

`

18

``

`-

code, print out a table with tokens. The ENDMARKER is omitted for

`

19

``

`-

brevity.

`

``

39

`+

code, print out a table with tokens. The ENDMARKER, ENCODING and

`

``

40

`+

final NEWLINE are omitted for brevity.

`

20

41

``

21

42

`def check_tokenize(self, s, expected):

`

22

43

`# Format the tokens in s in a table format.

`

23

``

`-

The ENDMARKER is omitted.

`

24

``

`-

result = []

`

``

44

`+

The ENDMARKER and final NEWLINE are omitted.

`

25

45

`f = BytesIO(s.encode('utf-8'))

`

26

``

`-

for type, token, start, end, line in tokenize(f.readline):

`

27

``

`-

if type == ENDMARKER:

`

28

``

`-

break

`

29

``

`-

type = tok_name[type]

`

30

``

`-

result.append(f" {type:10} {token!r:13} {start} {end}")

`

``

46

`+

result = stringify_tokens_from_source(tokenize(f.readline), s)

`

``

47

+

31

48

`self.assertEqual(result,

`

32

49

` [" ENCODING 'utf-8' (0, 0) (0, 0)"] +

`

33

50

`expected.rstrip().splitlines())

`

34

51

``

``

52

`+

def test_implicit_newline(self):

`

``

53

`+

Make sure that the tokenizer puts in an implicit NEWLINE

`

``

54

`+

when the input lacks a trailing new line.

`

``

55

`+

f = BytesIO("x".encode('utf-8'))

`

``

56

`+

tokens = list(tokenize(f.readline))

`

``

57

`+

self.assertEqual(tokens[-2].type, NEWLINE)

`

``

58

`+

self.assertEqual(tokens[-1].type, ENDMARKER)

`

``

59

+

35

60

`def test_basic(self):

`

36

61

`self.check_tokenize("1 + 1", """\

`

37

62

` NUMBER '1' (1, 0) (1, 1)

`

`@@ -1009,8 +1034,8 @@ def readline():

`

1009

1034

`else:

`

1010

1035

`return b''

`

1011

1036

``

1012

``

`-

skip the initial encoding token and the end token

`

1013

``

`-

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

`

``

1037

`+

skip the initial encoding token and the end tokens

`

``

1038

`+

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

`

1014

1039

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

`

1015

1040

`self.assertEqual(tokens, expected_tokens,

`

1016

1041

`"bytes not decoded with encoding")

`

`@@ -1026,8 +1051,8 @@ def readline():

`

1026

1051

`else:

`

1027

1052

`return b''

`

1028

1053

``

1029

``

`-

skip the end token

`

1030

``

`-

tokens = list(_tokenize(readline, encoding=None))[:-1]

`

``

1054

`+

skip the end tokens

`

``

1055

`+

tokens = list(_tokenize(readline, encoding=None))[:-2]

`

1031

1056

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

`

1032

1057

`self.assertEqual(tokens, expected_tokens,

`

1033

1058

`"string not tokenized when encoding is None")

`

`@@ -1338,18 +1363,21 @@ def test_oneline_defs(self):

`

1338

1363

``

1339

1364

`# Test that 500 consequent, one-line defs is OK

`

1340

1365

`toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

`

1341

``

`-

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

`

``

1366

`+

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

`

``

1367

`+

[-2] is always NEWLINE

`

1342

1368

``

1343

1369

`def assertExactTypeEqual(self, opstr, *optypes):

`

1344

1370

`tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

`

1345

1371

`num_optypes = len(optypes)

`

1346

``

`-

self.assertEqual(len(tokens), 2 + num_optypes)

`

``

1372

`+

self.assertEqual(len(tokens), 3 + num_optypes)

`

1347

1373

`self.assertEqual(tok_name[tokens[0].exact_type],

`

1348

1374

`tok_name[ENCODING])

`

1349

1375

`for i in range(num_optypes):

`

1350

1376

`self.assertEqual(tok_name[tokens[i + 1].exact_type],

`

1351

1377

`tok_name[optypes[i]])

`

1352

1378

`self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

`

``

1379

`+

tok_name[token.NEWLINE])

`

``

1380

`+

self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],

`

1353

1381

`tok_name[token.ENDMARKER])

`

1354

1382

``

1355

1383

`def test_exact_type(self):

`

`@@ -1502,7 +1530,7 @@ def test_roundtrip(self):

`

1502

1530

`self.check_roundtrip("if x == 1:\n"

`

1503

1531

`" print(x)\n")

`

1504

1532

`self.check_roundtrip("# This is a comment\n"

`

1505

``

`-

"# This also")

`

``

1533

`+

"# This also\n")

`

1506

1534

``

1507

1535

`# Some people use different formatting conventions, which makes

`

1508

1536

`# untokenize a little trickier. Note that this test involves trailing

`