mirror of
https://github.com/python/cpython
synced 2024-07-23 11:16:11 +00:00
parent
581619941e
commit
ab3823a97b
|
@ -1,20 +1,16 @@
|
|||
from test import support
|
||||
from test.support import os_helper
|
||||
from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
|
||||
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
|
||||
open as tokenize_open, Untokenizer, generate_tokens,
|
||||
NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
|
||||
TokenError)
|
||||
from io import BytesIO, StringIO
|
||||
import os
|
||||
import token
|
||||
import tokenize
|
||||
import unittest
|
||||
from io import BytesIO, StringIO
|
||||
from textwrap import dedent
|
||||
from unittest import TestCase, mock
|
||||
from test import support
|
||||
from test.support import os_helper
|
||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||
INVALID_UNDERSCORE_LITERALS)
|
||||
from test.support import os_helper
|
||||
from test.support.script_helper import run_test_script, make_script, run_python_until_end
|
||||
import os
|
||||
import token
|
||||
|
||||
# Converts a source string into a list of textual representation
|
||||
# of the tokens such as:
|
||||
|
@ -26,12 +22,12 @@ def stringify_tokens_from_source(token_generator, source_string):
|
|||
missing_trailing_nl = source_string[-1] not in '\r\n'
|
||||
|
||||
for type, token, start, end, line in token_generator:
|
||||
if type == ENDMARKER:
|
||||
if type == tokenize.ENDMARKER:
|
||||
break
|
||||
# Ignore the new line on the last line if the input lacks one
|
||||
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
|
||||
if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines:
|
||||
continue
|
||||
type = tok_name[type]
|
||||
type = tokenize.tok_name[type]
|
||||
result.append(f" {type:10} {token!r:13} {start} {end}")
|
||||
|
||||
return result
|
||||
|
@ -47,7 +43,7 @@ def check_tokenize(self, s, expected):
|
|||
# Format the tokens in s in a table format.
|
||||
# The ENDMARKER and final NEWLINE are omitted.
|
||||
f = BytesIO(s.encode('utf-8'))
|
||||
result = stringify_tokens_from_source(tokenize(f.readline), s)
|
||||
result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s)
|
||||
self.assertEqual(result,
|
||||
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
|
||||
expected.rstrip().splitlines())
|
||||
|
@ -57,27 +53,27 @@ def gen():
|
|||
yield "sdfosdg"
|
||||
yield "sdfosdg"
|
||||
with self.assertRaises(TypeError):
|
||||
list(tokenize(gen().__next__))
|
||||
list(tokenize.tokenize(gen().__next__))
|
||||
|
||||
def gen():
|
||||
yield b"sdfosdg"
|
||||
yield b"sdfosdg"
|
||||
with self.assertRaises(TypeError):
|
||||
list(generate_tokens(gen().__next__))
|
||||
list(tokenize.generate_tokens(gen().__next__))
|
||||
|
||||
def gen():
|
||||
yield "sdfosdg"
|
||||
1/0
|
||||
with self.assertRaises(ZeroDivisionError):
|
||||
list(generate_tokens(gen().__next__))
|
||||
list(tokenize.generate_tokens(gen().__next__))
|
||||
|
||||
def test_implicit_newline(self):
|
||||
# Make sure that the tokenizer puts in an implicit NEWLINE
|
||||
# when the input lacks a trailing new line.
|
||||
f = BytesIO("x".encode('utf-8'))
|
||||
tokens = list(tokenize(f.readline))
|
||||
self.assertEqual(tokens[-2].type, NEWLINE)
|
||||
self.assertEqual(tokens[-1].type, ENDMARKER)
|
||||
tokens = list(tokenize.tokenize(f.readline))
|
||||
self.assertEqual(tokens[-2].type, tokenize.NEWLINE)
|
||||
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
|
||||
|
||||
def test_basic(self):
|
||||
self.check_tokenize("1 + 1", """\
|
||||
|
@ -139,7 +135,7 @@ def k(x):
|
|||
with self.assertRaisesRegex(IndentationError,
|
||||
"unindent does not match any "
|
||||
"outer indentation level") as e:
|
||||
for tok in tokenize(readline):
|
||||
for tok in tokenize.tokenize(readline):
|
||||
pass
|
||||
self.assertEqual(e.exception.lineno, 3)
|
||||
self.assertEqual(e.exception.filename, '<string>')
|
||||
|
@ -275,8 +271,8 @@ def test_float(self):
|
|||
def test_underscore_literals(self):
|
||||
def number_token(s):
|
||||
f = BytesIO(s.encode('utf-8'))
|
||||
for toktype, token, start, end, line in tokenize(f.readline):
|
||||
if toktype == NUMBER:
|
||||
for toktype, token, start, end, line in tokenize.tokenize(f.readline):
|
||||
if toktype == tokenize.NUMBER:
|
||||
return token
|
||||
return 'invalid token'
|
||||
for lit in VALID_UNDERSCORE_LITERALS:
|
||||
|
@ -292,7 +288,7 @@ def number_token(s):
|
|||
continue
|
||||
try:
|
||||
number_token(lit)
|
||||
except TokenError:
|
||||
except tokenize.TokenError:
|
||||
continue
|
||||
self.assertNotEqual(number_token(lit), lit)
|
||||
|
||||
|
@ -1150,24 +1146,24 @@ def check_tokenize(self, s, expected):
|
|||
# Format the tokens in s in a table format.
|
||||
# The ENDMARKER and final NEWLINE are omitted.
|
||||
f = StringIO(s)
|
||||
result = stringify_tokens_from_source(generate_tokens(f.readline), s)
|
||||
result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s)
|
||||
self.assertEqual(result, expected.rstrip().splitlines())
|
||||
|
||||
|
||||
def decistmt(s):
|
||||
result = []
|
||||
g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
|
||||
g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
|
||||
for toknum, tokval, _, _, _ in g:
|
||||
if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
|
||||
if toknum == tokenize.NUMBER and '.' in tokval: # replace NUMBER tokens
|
||||
result.extend([
|
||||
(NAME, 'Decimal'),
|
||||
(OP, '('),
|
||||
(STRING, repr(tokval)),
|
||||
(OP, ')')
|
||||
(tokenize.NAME, 'Decimal'),
|
||||
(tokenize.OP, '('),
|
||||
(tokenize.STRING, repr(tokval)),
|
||||
(tokenize.OP, ')')
|
||||
])
|
||||
else:
|
||||
result.append((toknum, tokval))
|
||||
return untokenize(result).decode('utf-8').strip()
|
||||
return tokenize.untokenize(result).decode('utf-8').strip()
|
||||
|
||||
class TestMisc(TestCase):
|
||||
|
||||
|
@ -1191,6 +1187,13 @@ def test_decistmt(self):
|
|||
self.assertEqual(eval(decistmt(s)),
|
||||
Decimal('-3.217160342717258261933904529E-7'))
|
||||
|
||||
def test___all__(self):
|
||||
expected = token.__all__ + [
|
||||
"TokenInfo", "TokenError", "generate_tokens",
|
||||
"detect_encoding", "untokenize", "open", "tokenize",
|
||||
]
|
||||
self.assertCountEqual(tokenize.__all__, expected)
|
||||
|
||||
|
||||
class TestTokenizerAdheresToPep0263(TestCase):
|
||||
"""
|
||||
|
@ -1245,9 +1248,10 @@ def readline():
|
|||
yield b''
|
||||
|
||||
# skip the initial encoding token and the end tokens
|
||||
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
|
||||
extra_tokens=True))[:-2]
|
||||
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
|
||||
tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__,
|
||||
encoding='utf-8',
|
||||
extra_tokens=True))[:-2]
|
||||
expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
|
||||
self.assertEqual(tokens, expected_tokens,
|
||||
"bytes not decoded with encoding")
|
||||
|
||||
|
@ -1271,7 +1275,7 @@ def test_no_bom_no_encoding_cookie(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'utf-8')
|
||||
self.assertEqual(consumed_lines, list(lines[:2]))
|
||||
|
||||
|
@ -1281,7 +1285,7 @@ def test_bom_no_cookie(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'utf-8-sig')
|
||||
self.assertEqual(consumed_lines,
|
||||
[b'# something\n', b'print(something)\n'])
|
||||
|
@ -1292,7 +1296,7 @@ def test_cookie_first_line_no_bom(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'iso-8859-1')
|
||||
self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
|
||||
|
||||
|
@ -1302,7 +1306,7 @@ def test_matched_bom_and_cookie_first_line(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'utf-8-sig')
|
||||
self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
|
||||
|
||||
|
@ -1313,7 +1317,7 @@ def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
|
|||
b'do_something(else)\n'
|
||||
)
|
||||
readline = self.get_readline(lines)
|
||||
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
|
||||
|
||||
def test_cookie_second_line_no_bom(self):
|
||||
lines = (
|
||||
|
@ -1322,7 +1326,7 @@ def test_cookie_second_line_no_bom(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'ascii')
|
||||
expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
|
||||
self.assertEqual(consumed_lines, expected)
|
||||
|
@ -1334,7 +1338,7 @@ def test_matched_bom_and_cookie_second_line(self):
|
|||
b'print(something)\n',
|
||||
b'do_something(else)\n'
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'utf-8-sig')
|
||||
self.assertEqual(consumed_lines,
|
||||
[b'#! something\n', b'f# coding=utf-8\n'])
|
||||
|
@ -1347,7 +1351,7 @@ def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
|
|||
b'do_something(else)\n'
|
||||
)
|
||||
readline = self.get_readline(lines)
|
||||
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
|
||||
|
||||
def test_cookie_second_line_noncommented_first_line(self):
|
||||
lines = (
|
||||
|
@ -1355,7 +1359,7 @@ def test_cookie_second_line_noncommented_first_line(self):
|
|||
b'# vim: set fileencoding=iso8859-15 :\n',
|
||||
b"print('\xe2\x82\xac')\n"
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'utf-8')
|
||||
expected = [b"print('\xc2\xa3')\n"]
|
||||
self.assertEqual(consumed_lines, expected)
|
||||
|
@ -1366,7 +1370,7 @@ def test_cookie_second_line_commented_first_line(self):
|
|||
b'# vim: set fileencoding=iso8859-15 :\n',
|
||||
b"print('\xe2\x82\xac')\n"
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'iso8859-15')
|
||||
expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
|
||||
self.assertEqual(consumed_lines, expected)
|
||||
|
@ -1377,7 +1381,7 @@ def test_cookie_second_line_empty_first_line(self):
|
|||
b'# vim: set fileencoding=iso8859-15 :\n',
|
||||
b"print('\xe2\x82\xac')\n"
|
||||
)
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
|
||||
self.assertEqual(encoding, 'iso8859-15')
|
||||
expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
|
||||
self.assertEqual(consumed_lines, expected)
|
||||
|
@ -1394,7 +1398,7 @@ def test_latin1_normalization(self):
|
|||
b"print(things)\n",
|
||||
b"do_something += 4\n")
|
||||
rl = self.get_readline(lines)
|
||||
found, consumed_lines = detect_encoding(rl)
|
||||
found, consumed_lines = tokenize.detect_encoding(rl)
|
||||
self.assertEqual(found, "iso-8859-1")
|
||||
|
||||
def test_syntaxerror_latin1(self):
|
||||
|
@ -1404,7 +1408,7 @@ def test_syntaxerror_latin1(self):
|
|||
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
|
||||
)
|
||||
readline = self.get_readline(lines)
|
||||
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
|
||||
|
||||
|
||||
def test_utf8_normalization(self):
|
||||
|
@ -1417,36 +1421,36 @@ def test_utf8_normalization(self):
|
|||
b"# coding: " + enc.encode("ascii") + b"\n",
|
||||
b"1 + 3\n")
|
||||
rl = self.get_readline(lines)
|
||||
found, consumed_lines = detect_encoding(rl)
|
||||
found, consumed_lines = tokenize.detect_encoding(rl)
|
||||
self.assertEqual(found, "utf-8")
|
||||
|
||||
def test_short_files(self):
|
||||
readline = self.get_readline((b'print(something)\n',))
|
||||
encoding, consumed_lines = detect_encoding(readline)
|
||||
encoding, consumed_lines = tokenize.detect_encoding(readline)
|
||||
self.assertEqual(encoding, 'utf-8')
|
||||
self.assertEqual(consumed_lines, [b'print(something)\n'])
|
||||
|
||||
encoding, consumed_lines = detect_encoding(self.get_readline(()))
|
||||
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(()))
|
||||
self.assertEqual(encoding, 'utf-8')
|
||||
self.assertEqual(consumed_lines, [])
|
||||
|
||||
readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
|
||||
encoding, consumed_lines = detect_encoding(readline)
|
||||
encoding, consumed_lines = tokenize.detect_encoding(readline)
|
||||
self.assertEqual(encoding, 'utf-8-sig')
|
||||
self.assertEqual(consumed_lines, [b'print(something)\n'])
|
||||
|
||||
readline = self.get_readline((b'\xef\xbb\xbf',))
|
||||
encoding, consumed_lines = detect_encoding(readline)
|
||||
encoding, consumed_lines = tokenize.detect_encoding(readline)
|
||||
self.assertEqual(encoding, 'utf-8-sig')
|
||||
self.assertEqual(consumed_lines, [])
|
||||
|
||||
readline = self.get_readline((b'# coding: bad\n',))
|
||||
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
|
||||
|
||||
def test_false_encoding(self):
|
||||
# Issue 18873: "Encoding" detected in non-comment lines
|
||||
readline = self.get_readline((b'print("#coding=fake")',))
|
||||
encoding, consumed_lines = detect_encoding(readline)
|
||||
encoding, consumed_lines = tokenize.detect_encoding(readline)
|
||||
self.assertEqual(encoding, 'utf-8')
|
||||
self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
|
||||
|
||||
|
@ -1459,14 +1463,14 @@ def test_open(self):
|
|||
with open(filename, 'w', encoding=encoding) as fp:
|
||||
print("# coding: %s" % encoding, file=fp)
|
||||
print("print('euro:\u20ac')", file=fp)
|
||||
with tokenize_open(filename) as fp:
|
||||
with tokenize.open(filename) as fp:
|
||||
self.assertEqual(fp.encoding, encoding)
|
||||
self.assertEqual(fp.mode, 'r')
|
||||
|
||||
# test BOM (no coding cookie)
|
||||
with open(filename, 'w', encoding='utf-8-sig') as fp:
|
||||
print("print('euro:\u20ac')", file=fp)
|
||||
with tokenize_open(filename) as fp:
|
||||
with tokenize.open(filename) as fp:
|
||||
self.assertEqual(fp.encoding, 'utf-8-sig')
|
||||
self.assertEqual(fp.mode, 'r')
|
||||
|
||||
|
@ -1493,16 +1497,16 @@ def readline(self):
|
|||
ins = Bunk(lines, path)
|
||||
# Make sure lacking a name isn't an issue.
|
||||
del ins.name
|
||||
detect_encoding(ins.readline)
|
||||
tokenize.detect_encoding(ins.readline)
|
||||
with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
|
||||
ins = Bunk(lines, path)
|
||||
detect_encoding(ins.readline)
|
||||
tokenize.detect_encoding(ins.readline)
|
||||
|
||||
def test_open_error(self):
|
||||
# Issue #23840: open() must close the binary file on error
|
||||
m = BytesIO(b'#coding:xxx')
|
||||
with mock.patch('tokenize._builtin_open', return_value=m):
|
||||
self.assertRaises(SyntaxError, tokenize_open, 'foobar')
|
||||
self.assertRaises(SyntaxError, tokenize.open, 'foobar')
|
||||
self.assertTrue(m.closed)
|
||||
|
||||
|
||||
|
@ -1542,7 +1546,7 @@ def mock_readline():
|
|||
tokenize_module.detect_encoding = mock_detect_encoding
|
||||
tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
|
||||
try:
|
||||
results = tokenize(mock_readline)
|
||||
results = tokenize.tokenize(mock_readline)
|
||||
self.assertEqual(list(results)[1:],
|
||||
[b'first', b'second', b'1', b'2', b'3', b'4'])
|
||||
finally:
|
||||
|
@ -1559,23 +1563,23 @@ def test_oneline_defs(self):
|
|||
buf = '\n'.join(buf)
|
||||
|
||||
# Test that 500 consequent, one-line defs is OK
|
||||
toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
|
||||
toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline))
|
||||
self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
|
||||
# [-2] is always NEWLINE
|
||||
|
||||
def assertExactTypeEqual(self, opstr, *optypes):
|
||||
tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
|
||||
tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline))
|
||||
num_optypes = len(optypes)
|
||||
self.assertEqual(len(tokens), 3 + num_optypes)
|
||||
self.assertEqual(tok_name[tokens[0].exact_type],
|
||||
tok_name[ENCODING])
|
||||
self.assertEqual(tokenize.tok_name[tokens[0].exact_type],
|
||||
tokenize.tok_name[tokenize.ENCODING])
|
||||
for i in range(num_optypes):
|
||||
self.assertEqual(tok_name[tokens[i + 1].exact_type],
|
||||
tok_name[optypes[i]])
|
||||
self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
|
||||
tok_name[token.NEWLINE])
|
||||
self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
|
||||
tok_name[token.ENDMARKER])
|
||||
self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type],
|
||||
tokenize.tok_name[optypes[i]])
|
||||
self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type],
|
||||
tokenize.tok_name[token.NEWLINE])
|
||||
self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type],
|
||||
tokenize.tok_name[token.ENDMARKER])
|
||||
|
||||
def test_exact_type(self):
|
||||
self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
|
||||
|
@ -1625,11 +1629,11 @@ def test_exact_type(self):
|
|||
self.assertExactTypeEqual('@=', token.ATEQUAL)
|
||||
|
||||
self.assertExactTypeEqual('a**2+b**2==c**2',
|
||||
NAME, token.DOUBLESTAR, NUMBER,
|
||||
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
|
||||
token.PLUS,
|
||||
NAME, token.DOUBLESTAR, NUMBER,
|
||||
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
|
||||
token.EQEQUAL,
|
||||
NAME, token.DOUBLESTAR, NUMBER)
|
||||
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER)
|
||||
self.assertExactTypeEqual('{1, 2, 3}',
|
||||
token.LBRACE,
|
||||
token.NUMBER, token.COMMA,
|
||||
|
@ -1650,32 +1654,32 @@ def test_comment_at_the_end_of_the_source_without_newline(self):
|
|||
# See http://bugs.python.org/issue44667
|
||||
source = 'b = 1\n\n#test'
|
||||
expected_tokens = [
|
||||
TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
||||
TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
|
||||
TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
|
||||
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
|
||||
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
|
||||
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
|
||||
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
|
||||
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
|
||||
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
|
||||
tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
||||
tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
|
||||
tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
|
||||
tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
|
||||
tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
|
||||
tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
|
||||
tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
|
||||
tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
|
||||
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
|
||||
]
|
||||
|
||||
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
|
||||
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
|
||||
self.assertEqual(tokens, expected_tokens)
|
||||
|
||||
def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
|
||||
# See https://github.com/python/cpython/issues/105435
|
||||
source = 'a\n '
|
||||
expected_tokens = [
|
||||
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
||||
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
|
||||
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
|
||||
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
|
||||
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
|
||||
tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
||||
tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
|
||||
tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
|
||||
tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
|
||||
tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
|
||||
]
|
||||
|
||||
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
|
||||
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
|
||||
self.assertEqual(tokens, expected_tokens)
|
||||
|
||||
def test_invalid_character_in_fstring_middle(self):
|
||||
|
@ -1695,7 +1699,7 @@ class UntokenizeTest(TestCase):
|
|||
|
||||
def test_bad_input_order(self):
|
||||
# raise if previous row
|
||||
u = Untokenizer()
|
||||
u = tokenize.Untokenizer()
|
||||
u.prev_row = 2
|
||||
u.prev_col = 2
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
|
@ -1707,7 +1711,7 @@ def test_bad_input_order(self):
|
|||
|
||||
def test_backslash_continuation(self):
|
||||
# The problem is that <whitespace>\<newline> leaves no token
|
||||
u = Untokenizer()
|
||||
u = tokenize.Untokenizer()
|
||||
u.prev_row = 1
|
||||
u.prev_col = 1
|
||||
u.tokens = []
|
||||
|
@ -1719,17 +1723,17 @@ def test_backslash_continuation(self):
|
|||
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
|
||||
|
||||
def test_iter_compat(self):
|
||||
u = Untokenizer()
|
||||
token = (NAME, 'Hello')
|
||||
tokens = [(ENCODING, 'utf-8'), token]
|
||||
u = tokenize.Untokenizer()
|
||||
token = (tokenize.NAME, 'Hello')
|
||||
tokens = [(tokenize.ENCODING, 'utf-8'), token]
|
||||
u.compat(token, iter([]))
|
||||
self.assertEqual(u.tokens, ["Hello "])
|
||||
u = Untokenizer()
|
||||
u = tokenize.Untokenizer()
|
||||
self.assertEqual(u.untokenize(iter([token])), 'Hello ')
|
||||
u = Untokenizer()
|
||||
u = tokenize.Untokenizer()
|
||||
self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
|
||||
self.assertEqual(u.encoding, 'utf-8')
|
||||
self.assertEqual(untokenize(iter(tokens)), b'Hello ')
|
||||
self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ')
|
||||
|
||||
|
||||
class TestRoundtrip(TestCase):
|
||||
|
@ -1752,17 +1756,17 @@ def check_roundtrip(self, f):
|
|||
else:
|
||||
code = f.read()
|
||||
readline = iter(code.splitlines(keepends=True)).__next__
|
||||
tokens5 = list(tokenize(readline))
|
||||
tokens5 = list(tokenize.tokenize(readline))
|
||||
tokens2 = [tok[:2] for tok in tokens5]
|
||||
# Reproduce tokens2 from pairs
|
||||
bytes_from2 = untokenize(tokens2)
|
||||
bytes_from2 = tokenize.untokenize(tokens2)
|
||||
readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
|
||||
tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
|
||||
tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)]
|
||||
self.assertEqual(tokens2_from2, tokens2)
|
||||
# Reproduce tokens2 from 5-tuples
|
||||
bytes_from5 = untokenize(tokens5)
|
||||
bytes_from5 = tokenize.untokenize(tokens5)
|
||||
readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
|
||||
tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
|
||||
tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
|
||||
self.assertEqual(tokens2_from5, tokens2)
|
||||
|
||||
def check_line_extraction(self, f):
|
||||
|
@ -1771,8 +1775,8 @@ def check_line_extraction(self, f):
|
|||
else:
|
||||
code = f.read()
|
||||
readline = iter(code.splitlines(keepends=True)).__next__
|
||||
for tok in tokenize(readline):
|
||||
if tok.type in {ENCODING, ENDMARKER}:
|
||||
for tok in tokenize.tokenize(readline):
|
||||
if tok.type in {tokenize.ENCODING, tokenize.ENDMARKER}:
|
||||
continue
|
||||
self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
|
||||
|
||||
|
@ -1878,7 +1882,7 @@ def test_random_files(self):
|
|||
def roundtrip(self, code):
|
||||
if isinstance(code, str):
|
||||
code = code.encode('utf-8')
|
||||
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
|
||||
return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8')
|
||||
|
||||
def test_indentation_semantics_retained(self):
|
||||
"""
|
||||
|
@ -1896,27 +1900,27 @@ def test_number_followed_by_name(self):
|
|||
# See issue #gh-105549
|
||||
source = "2sin(x)"
|
||||
expected_tokens = [
|
||||
TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
|
||||
TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
|
||||
TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
|
||||
TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
|
||||
TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
|
||||
TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
|
||||
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
|
||||
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
]
|
||||
|
||||
tokens = list(generate_tokens(StringIO(source).readline))
|
||||
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
|
||||
self.assertEqual(tokens, expected_tokens)
|
||||
|
||||
def test_number_starting_with_zero(self):
|
||||
source = "01234"
|
||||
expected_tokens = [
|
||||
TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
|
||||
TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
|
||||
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
|
||||
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
|
||||
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
]
|
||||
|
||||
tokens = list(generate_tokens(StringIO(source).readline))
|
||||
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
|
||||
self.assertEqual(tokens, expected_tokens)
|
||||
|
||||
class CTokenizeTest(TestCase):
|
||||
|
@ -1926,7 +1930,7 @@ def check_tokenize(self, s, expected):
|
|||
f = StringIO(s)
|
||||
with self.subTest(source=s):
|
||||
result = stringify_tokens_from_source(
|
||||
_generate_tokens_from_c_tokenizer(f.readline), s
|
||||
tokenize._generate_tokens_from_c_tokenizer(f.readline), s
|
||||
)
|
||||
self.assertEqual(result, expected.rstrip().splitlines())
|
||||
|
||||
|
@ -1935,15 +1939,15 @@ def readline(encoding):
|
|||
yield "1+1".encode(encoding)
|
||||
|
||||
expected = [
|
||||
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
|
||||
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
|
||||
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
|
||||
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
|
||||
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
|
||||
tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
|
||||
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
|
||||
tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
|
||||
tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||
]
|
||||
for encoding in ["utf-8", "latin-1", "utf-16"]:
|
||||
with self.subTest(encoding=encoding):
|
||||
tokens = list(_generate_tokens_from_c_tokenizer(
|
||||
tokens = list(tokenize._generate_tokens_from_c_tokenizer(
|
||||
readline(encoding).__next__,
|
||||
extra_tokens=True,
|
||||
encoding=encoding,
|
||||
|
@ -2796,7 +2800,7 @@ def test_unicode(self):
|
|||
def test_invalid_syntax(self):
|
||||
def get_tokens(string):
|
||||
the_string = StringIO(string)
|
||||
return list(_generate_tokens_from_c_tokenizer(the_string.readline))
|
||||
return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline))
|
||||
|
||||
for case in [
|
||||
"(1+2]",
|
||||
|
@ -2832,7 +2836,7 @@ def get_tokens(string):
|
|||
"]",
|
||||
]:
|
||||
with self.subTest(case=case):
|
||||
self.assertRaises(TokenError, get_tokens, case)
|
||||
self.assertRaises(tokenize.TokenError, get_tokens, case)
|
||||
|
||||
def test_max_indent(self):
|
||||
MAXINDENT = 100
|
||||
|
@ -2844,14 +2848,14 @@ def generate_source(indents):
|
|||
|
||||
valid = generate_source(MAXINDENT - 1)
|
||||
the_input = StringIO(valid)
|
||||
tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline))
|
||||
self.assertEqual(tokens[-2].type, DEDENT)
|
||||
self.assertEqual(tokens[-1].type, ENDMARKER)
|
||||
tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))
|
||||
self.assertEqual(tokens[-2].type, tokenize.DEDENT)
|
||||
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
|
||||
compile(valid, "<string>", "exec")
|
||||
|
||||
invalid = generate_source(MAXINDENT)
|
||||
the_input = StringIO(invalid)
|
||||
self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
|
||||
self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)))
|
||||
self.assertRaises(
|
||||
IndentationError, compile, invalid, "<string>", "exec"
|
||||
)
|
||||
|
@ -2860,7 +2864,7 @@ def test_continuation_lines_indentation(self):
|
|||
def get_tokens(string):
|
||||
the_string = StringIO(string)
|
||||
return [(kind, string) for (kind, string, *_)
|
||||
in _generate_tokens_from_c_tokenizer(the_string.readline)]
|
||||
in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)]
|
||||
|
||||
code = dedent("""
|
||||
def fib(n):
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
|
||||
import token
|
||||
__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
|
||||
"untokenize", "TokenInfo"]
|
||||
"untokenize", "TokenInfo", "open", "TokenError"]
|
||||
del token
|
||||
|
||||
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
|
||||
|
@ -162,8 +162,6 @@ def _compile(expr):
|
|||
class TokenError(Exception): pass
|
||||
|
||||
|
||||
class StopTokenizing(Exception): pass
|
||||
|
||||
class Untokenizer:
|
||||
|
||||
def __init__(self):
|
||||
|
|
Loading…
Reference in a new issue