[3.6] bpo-33899: Make tokenize module mirror end-of-file is end-of-li… · python/cpython@11c36a3 (original) (raw)

1

`from test import support

2

`from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

`STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

4

open as tokenize_open, Untokenizer, generate_tokens,

5

NEWLINE)

5

6

`from io import BytesIO

6

7

`import unittest

7

8

`from unittest import TestCase, mock

`@@ -11,27 +12,51 @@

11

12

`import token

12

13

14

15

Converts a source string into a list of textual representation

16

of the tokens such as:

17

`` +

`NAME 'if' (1, 0) (1, 2)`

18

to make writing tests easier.

19

def stringify_tokens_from_source(token_generator, source_string):

20

result = []

21

num_lines = len(source_string.splitlines())

22

missing_trailing_nl = source_string[-1] not in '\r\n'

23

+

24

for type, token, start, end, line in token_generator:

25

if type == ENDMARKER:

26

break

27

Ignore the new line on the last line if the input lacks one

28

if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:

29

continue

30

type = tok_name[type]

31

result.append(f" {type:10} {token!r:13} {start} {end}")

32

+

33

return result

34

+

14

35

`class TokenizeTest(TestCase):

15

36

`# Tests for the tokenize module.

16

37

17

38

`# The tests can be really simple. Given a small fragment of source

18

code, print out a table with tokens. The ENDMARKER is omitted for

19

brevity.

39

code, print out a table with tokens. The ENDMARKER, ENCODING and

40

final NEWLINE are omitted for brevity.

20

41

21

42

`def check_tokenize(self, s, expected):

22

43

`# Format the tokens in s in a table format.

23

The ENDMARKER is omitted.

24

result = []

44

The ENDMARKER and final NEWLINE are omitted.

25

45

`f = BytesIO(s.encode('utf-8'))

26

for type, token, start, end, line in tokenize(f.readline):

27

if type == ENDMARKER:

28

break

29

type = tok_name[type]

30

result.append(f" {type:10} {token!r:13} {start} {end}")

46

result = stringify_tokens_from_source(tokenize(f.readline), s)

47

+

31

48

`self.assertEqual(result,

32

49

` [" ENCODING 'utf-8' (0, 0) (0, 0)"] +

33

50

`expected.rstrip().splitlines())

34

51

52

def test_implicit_newline(self):

53

Make sure that the tokenizer puts in an implicit NEWLINE

54

when the input lacks a trailing new line.

55

f = BytesIO("x".encode('utf-8'))

56

tokens = list(tokenize(f.readline))

57

self.assertEqual(tokens[-2].type, NEWLINE)

58

self.assertEqual(tokens[-1].type, ENDMARKER)

59

+

35

60

`def test_basic(self):

36

61

`self.check_tokenize("1 + 1", """\

37

62

` NUMBER '1' (1, 0) (1, 1)

`@@ -993,8 +1018,8 @@ def readline():

993

1018

`else:

994

1019

`return b''

995

1020

996

skip the initial encoding token and the end token

997

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

1021

skip the initial encoding token and the end tokens

1022

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]

998

1023

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

999

1024

`self.assertEqual(tokens, expected_tokens,

1000

1025

`"bytes not decoded with encoding")

`@@ -1010,8 +1035,8 @@ def readline():

1010

1035

`else:

1011

1036

`return b''

1012

1037

1013

skip the end token

1014

tokens = list(_tokenize(readline, encoding=None))[:-1]

1038

skip the end tokens

1039

tokens = list(_tokenize(readline, encoding=None))[:-2]

1015

1040

`expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

1016

1041

`self.assertEqual(tokens, expected_tokens,

1017

1042

`"string not tokenized when encoding is None")

`@@ -1322,18 +1347,21 @@ def test_oneline_defs(self):

1322

1347

1323

1348

`# Test that 500 consequent, one-line defs is OK

1324

1349

`toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1325

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1350

self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER

1351

[-2] is always NEWLINE

1326

1352

1327

1353

`def assertExactTypeEqual(self, opstr, *optypes):

1328

1354

`tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1329

1355

`num_optypes = len(optypes)

1330

self.assertEqual(len(tokens), 2 + num_optypes)

1356

self.assertEqual(len(tokens), 3 + num_optypes)

1331

1357

`self.assertEqual(token.tok_name[tokens[0].exact_type],

1332

1358

`token.tok_name[ENCODING])

1333

1359

`for i in range(num_optypes):

1334

1360

`self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1335

1361

`token.tok_name[optypes[i]])

1336

1362

`self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1363

token.tok_name[token.NEWLINE])

1364

self.assertEqual(token.tok_name[tokens[2 + num_optypes].exact_type],

1337

1365

`token.tok_name[token.ENDMARKER])

1338

1366

1339

1367

`def test_exact_type(self):

`@@ -1484,7 +1512,7 @@ def test_roundtrip(self):

1484

1512

`self.check_roundtrip("if x == 1:\n"

1485

1513

`" print(x)\n")

1486

1514

`self.check_roundtrip("# This is a comment\n"

1487

"# This also")

1515

"# This also\n")

1488

1516

1489

1517

`# Some people use different formatting conventions, which makes

1490

1518

`# untokenize a little trickier. Note that this test involves trailing