[3.7] bpo-33899: Make tokenize module mirror end-of-file is end-of-li… · python/cpython@ab75d9e (original) (raw)

1

`from test import support

2

`from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

`STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

4

open as tokenize_open, Untokenizer, generate_tokens,

5

NEWLINE)

5

6

`from io import BytesIO

6

7

`import unittest

7

8

`from unittest import TestCase, mock

`@@ -11,27 +12,51 @@

11

12

`import token

12

13

14

15

Converts a source string into a list of textual representation

16

of the tokens such as:

17

`` +

`NAME 'if' (1, 0) (1, 2)`

18

to make writing tests easier.

19

def stringify_tokens_from_source(token_generator, source_string):

20

result = []

21

num_lines = len(source_string.splitlines())

22

missing_trailing_nl = source_string[-1] not in '\r\n'

23

+

24

for type, token, start, end, line in token_generator:

25

if type == ENDMARKER:

26

break

27

Ignore the new line on the last line if the input lacks one

28

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

29

continue

30

type = tok_name[type]

31

result.append(f" {type:10} {token!r:13} {start} {end}")

32

+

33

return result

34

+

14

35

`class TokenizeTest(TestCase):

15

36

`# Tests for the tokenize module.

16

37

17

38

`# The tests can be really simple. Given a small fragment of source

18

code, print out a table with tokens. The ENDMARKER is omitted for

19

brevity.

39

code, print out a table with tokens. The ENDMARKER, ENCODING and

40

final NEWLINE are omitted for brevity.

20

41

21

42

`def check_tokenize(self, s, expected):

22

43

`# Format the tokens in s in a table format.

23

The ENDMARKER is omitted.

24

result = []

44

The ENDMARKER and final NEWLINE are omitted.

25

45

`f = BytesIO(s.encode('utf-8'))

26

for type, token, start, end, line in tokenize(f.readline):

27

if type == ENDMARKER:

28

break

29

type = tok_name[type]

30

result.append(f" {type:10} {token!r:13} {start} {end}")

46

result = stringify_tokens_from_source(tokenize(f.readline), s)

47

+

31

48

`self.assertEqual(result,

32

49

` [" ENCODING 'utf-8' (0, 0) (0, 0)"] +

33

50

`expected.rstrip().splitlines())

34

51

52

def test_implicit_newline(self):

53

Make sure that the tokenizer puts in an implicit NEWLINE

54

when the input lacks a trailing new line.

55

f = BytesIO("x".encode('utf-8'))

56

tokens = list(tokenize(f.readline))

57

self.assertEqual(tokens[-2].type, NEWLINE)

58

self.assertEqual(tokens[-1].type, ENDMARKER)

59

+

35

60

`def test_basic(self):

36

61

`self.check_tokenize("1 + 1", """\

37

62

` NUMBER '1' (1, 0) (1, 1)

`@@ -1009,8 +1034,8 @@ def readline():

1009

1034

`else:

1010

1035

`return b''

1011

1036

1012

skip the initial encoding token and the end token

1013

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1037

skip the initial encoding token and the end tokens

1038

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

1014

1039

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

1015

1040

`self.assertEqual(tokens, expected_tokens,

1016

1041

`"bytes not decoded with encoding")

`@@ -1026,8 +1051,8 @@ def readline():

1026

1051

`else:

1027

1052

`return b''

1028

1053

1029

skip the end token

1030

tokens = list(_tokenize(readline, encoding=None))[:-1]

1054

skip the end tokens

1055

tokens = list(_tokenize(readline, encoding=None))[:-2]

1031

1056

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

1032

1057

`self.assertEqual(tokens, expected_tokens,

1033

1058

`"string not tokenized when encoding is None")

`@@ -1338,18 +1363,21 @@ def test_oneline_defs(self):

1338

1363

1339

1364

`# Test that 500 consequent, one-line defs is OK

1340

1365

`toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1341

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1366

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

1367

[-2] is always NEWLINE

1342

1368

1343

1369

`def assertExactTypeEqual(self, opstr, *optypes):

1344

1370

`tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1345

1371

`num_optypes = len(optypes)

1346

self.assertEqual(len(tokens), 2 + num_optypes)

1372

self.assertEqual(len(tokens), 3 + num_optypes)

1347

1373

`self.assertEqual(tok_name[tokens[0].exact_type],

1348

1374

`tok_name[ENCODING])

1349

1375

`for i in range(num_optypes):

1350

1376

`self.assertEqual(tok_name[tokens[i + 1].exact_type],

1351

1377

`tok_name[optypes[i]])

1352

1378

`self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],

1379

tok_name[token.NEWLINE])

1380

self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],

1353

1381

`tok_name[token.ENDMARKER])

1354

1382

1355

1383

`def test_exact_type(self):

`@@ -1502,7 +1530,7 @@ def test_roundtrip(self):

1502

1530

`self.check_roundtrip("if x == 1:\n"

1503

1531

`" print(x)\n")

1504

1532

`self.check_roundtrip("# This is a comment\n"

1505

"# This also")

1533

"# This also\n")

1506

1534

1507

1535

`# Some people use different formatting conventions, which makes

1508

1536

`# untokenize a little trickier. Note that this test involves trailing