bpo-33899: Revert tokenize module adding an implicit final NEWLINE (G… · python/cpython@a1f45ec (original) (raw)

1

1

`from test import test_support

`

2

``

`-

from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,

`

``

2

`+

from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,

`

3

3

`STRING, ENDMARKER, tok_name, Untokenizer, tokenize)

`

4

4

`from StringIO import StringIO

`

5

5

`import os

`

6

6

`from unittest import TestCase

`

7

7

``

8

8

``

9

``

`-

Converts a source string into a list of textual representation

`

10

``

`-

of the tokens such as:

`

11

``

`` -

NAME 'if' (1, 0) (1, 2)

``

12

``

`-

to make writing tests easier.

`

13

``

`-

def stringify_tokens_from_source(token_generator, source_string):

`

14

``

`-

result = []

`

15

``

`-

num_lines = len(source_string.splitlines())

`

16

``

`-

missing_trailing_nl = source_string[-1] not in '\r\n'

`

17

``

-

18

``

`-

for type, token, start, end, line in token_generator:

`

19

``

`-

if type == ENDMARKER:

`

20

``

`-

break

`

21

``

`-

Ignore the new line on the last line if the input lacks one

`

22

``

`-

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

`

23

``

`-

continue

`

24

``

`-

type = tok_name[type]

`

25

``

`-

result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %

`

26

``

`-

locals())

`

27

``

-

28

``

`-

return result

`

29

``

-

30

9

`class TokenizeTest(TestCase):

`

31

10

`# Tests for the tokenize module.

`

32

11

``

33

12

`# The tests can be really simple. Given a small fragment of source

`

34

``

`-

code, print out a table with tokens. The ENDMARKER, ENCODING and

`

35

``

`-

final NEWLINE are omitted for brevity.

`

``

13

`+

code, print out a table with tokens. The ENDMARKER is omitted for

`

``

14

`+

brevity.

`

36

15

``

37

16

`def check_tokenize(self, s, expected):

`

38

17

`# Format the tokens in s in a table format.

`

``

18

`+

The ENDMARKER is omitted.

`

``

19

`+

result = []

`

39

20

`f = StringIO(s)

`

40

``

`-

result = stringify_tokens_from_source(generate_tokens(f.readline), s)

`

41

``

-

``

21

`+

for type, token, start, end, line in generate_tokens(f.readline):

`

``

22

`+

if type == ENDMARKER:

`

``

23

`+

break

`

``

24

`+

type = tok_name[type]

`

``

25

`+

result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %

`

``

26

`+

locals())

`

42

27

`self.assertEqual(result,

`

43

28

`expected.rstrip().splitlines())

`

44

29

``

45

``

`-

def test_implicit_newline(self):

`

46

``

`-

Make sure that the tokenizer puts in an implicit NEWLINE

`

47

``

`-

when the input lacks a trailing new line.

`

48

``

`-

f = StringIO("x")

`

49

``

`-

tokens = list(generate_tokens(f.readline))

`

50

``

`-

self.assertEqual(tokens[-2][0], NEWLINE)

`

51

``

`-

self.assertEqual(tokens[-1][0], ENDMARKER)

`

52

30

``

53

31

`def test_basic(self):

`

54

32

`self.check_tokenize("1 + 1", """\

`

`@@ -638,7 +616,7 @@ def test_roundtrip(self):

`

638

616

`self.check_roundtrip("if x == 1:\n"

`

639

617

`" print x\n")

`

640

618

`self.check_roundtrip("# This is a comment\n"

`

641

``

`-

"# This also\n")

`

``

619

`+

"# This also")

`

642

620

``

643

621

`# Some people use different formatting conventions, which makes

`

644

622

`# untokenize a little trickier. Note that this test involves trailing

`