gh-71299: Fix __all__ in tokenize (#105907)

Co-authored-by: Unit03
This commit is contained in:
Lysandros Nikolaou 2023-06-19 13:31:57 +02:00 committed by GitHub
parent 581619941e
commit ab3823a97b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 137 additions and 135 deletions

View file

@ -1,20 +1,16 @@
from test import support
from test.support import os_helper
from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
open as tokenize_open, Untokenizer, generate_tokens,
NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
TokenError)
from io import BytesIO, StringIO
import os
import token
import tokenize
import unittest
from io import BytesIO, StringIO
from textwrap import dedent
from unittest import TestCase, mock
from test import support
from test.support import os_helper
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
INVALID_UNDERSCORE_LITERALS)
from test.support import os_helper
from test.support.script_helper import run_test_script, make_script, run_python_until_end
import os
import token
# Converts a source string into a list of textual representation
# of the tokens such as:
@ -26,12 +22,12 @@ def stringify_tokens_from_source(token_generator, source_string):
missing_trailing_nl = source_string[-1] not in '\r\n'
for type, token, start, end, line in token_generator:
if type == ENDMARKER:
if type == tokenize.ENDMARKER:
break
# Ignore the new line on the last line if the input lacks one
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines:
continue
type = tok_name[type]
type = tokenize.tok_name[type]
result.append(f" {type:10} {token!r:13} {start} {end}")
return result
@ -47,7 +43,7 @@ def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
# The ENDMARKER and final NEWLINE are omitted.
f = BytesIO(s.encode('utf-8'))
result = stringify_tokens_from_source(tokenize(f.readline), s)
result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s)
self.assertEqual(result,
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
expected.rstrip().splitlines())
@ -57,27 +53,27 @@ def gen():
yield "sdfosdg"
yield "sdfosdg"
with self.assertRaises(TypeError):
list(tokenize(gen().__next__))
list(tokenize.tokenize(gen().__next__))
def gen():
yield b"sdfosdg"
yield b"sdfosdg"
with self.assertRaises(TypeError):
list(generate_tokens(gen().__next__))
list(tokenize.generate_tokens(gen().__next__))
def gen():
yield "sdfosdg"
1/0
with self.assertRaises(ZeroDivisionError):
list(generate_tokens(gen().__next__))
list(tokenize.generate_tokens(gen().__next__))
def test_implicit_newline(self):
# Make sure that the tokenizer puts in an implicit NEWLINE
# when the input lacks a trailing new line.
f = BytesIO("x".encode('utf-8'))
tokens = list(tokenize(f.readline))
self.assertEqual(tokens[-2].type, NEWLINE)
self.assertEqual(tokens[-1].type, ENDMARKER)
tokens = list(tokenize.tokenize(f.readline))
self.assertEqual(tokens[-2].type, tokenize.NEWLINE)
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
def test_basic(self):
self.check_tokenize("1 + 1", """\
@ -139,7 +135,7 @@ def k(x):
with self.assertRaisesRegex(IndentationError,
"unindent does not match any "
"outer indentation level") as e:
for tok in tokenize(readline):
for tok in tokenize.tokenize(readline):
pass
self.assertEqual(e.exception.lineno, 3)
self.assertEqual(e.exception.filename, '<string>')
@ -275,8 +271,8 @@ def test_float(self):
def test_underscore_literals(self):
def number_token(s):
f = BytesIO(s.encode('utf-8'))
for toktype, token, start, end, line in tokenize(f.readline):
if toktype == NUMBER:
for toktype, token, start, end, line in tokenize.tokenize(f.readline):
if toktype == tokenize.NUMBER:
return token
return 'invalid token'
for lit in VALID_UNDERSCORE_LITERALS:
@ -292,7 +288,7 @@ def number_token(s):
continue
try:
number_token(lit)
except TokenError:
except tokenize.TokenError:
continue
self.assertNotEqual(number_token(lit), lit)
@ -1150,24 +1146,24 @@ def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
# The ENDMARKER and final NEWLINE are omitted.
f = StringIO(s)
result = stringify_tokens_from_source(generate_tokens(f.readline), s)
result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s)
self.assertEqual(result, expected.rstrip().splitlines())
def decistmt(s):
result = []
g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
for toknum, tokval, _, _, _ in g:
if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
if toknum == tokenize.NUMBER and '.' in tokval: # replace NUMBER tokens
result.extend([
(NAME, 'Decimal'),
(OP, '('),
(STRING, repr(tokval)),
(OP, ')')
(tokenize.NAME, 'Decimal'),
(tokenize.OP, '('),
(tokenize.STRING, repr(tokval)),
(tokenize.OP, ')')
])
else:
result.append((toknum, tokval))
return untokenize(result).decode('utf-8').strip()
return tokenize.untokenize(result).decode('utf-8').strip()
class TestMisc(TestCase):
@ -1191,6 +1187,13 @@ def test_decistmt(self):
self.assertEqual(eval(decistmt(s)),
Decimal('-3.217160342717258261933904529E-7'))
def test___all__(self):
expected = token.__all__ + [
"TokenInfo", "TokenError", "generate_tokens",
"detect_encoding", "untokenize", "open", "tokenize",
]
self.assertCountEqual(tokenize.__all__, expected)
class TestTokenizerAdheresToPep0263(TestCase):
"""
@ -1245,9 +1248,10 @@ def readline():
yield b''
# skip the initial encoding token and the end tokens
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
extra_tokens=True))[:-2]
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__,
encoding='utf-8',
extra_tokens=True))[:-2]
expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
self.assertEqual(tokens, expected_tokens,
"bytes not decoded with encoding")
@ -1271,7 +1275,7 @@ def test_no_bom_no_encoding_cookie(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, list(lines[:2]))
@ -1281,7 +1285,7 @@ def test_bom_no_cookie(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines,
[b'# something\n', b'print(something)\n'])
@ -1292,7 +1296,7 @@ def test_cookie_first_line_no_bom(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso-8859-1')
self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
@ -1302,7 +1306,7 @@ def test_matched_bom_and_cookie_first_line(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
@ -1313,7 +1317,7 @@ def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
b'do_something(else)\n'
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_cookie_second_line_no_bom(self):
lines = (
@ -1322,7 +1326,7 @@ def test_cookie_second_line_no_bom(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'ascii')
expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
self.assertEqual(consumed_lines, expected)
@ -1334,7 +1338,7 @@ def test_matched_bom_and_cookie_second_line(self):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines,
[b'#! something\n', b'f# coding=utf-8\n'])
@ -1347,7 +1351,7 @@ def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
b'do_something(else)\n'
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_cookie_second_line_noncommented_first_line(self):
lines = (
@ -1355,7 +1359,7 @@ def test_cookie_second_line_noncommented_first_line(self):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8')
expected = [b"print('\xc2\xa3')\n"]
self.assertEqual(consumed_lines, expected)
@ -1366,7 +1370,7 @@ def test_cookie_second_line_commented_first_line(self):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso8859-15')
expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
self.assertEqual(consumed_lines, expected)
@ -1377,7 +1381,7 @@ def test_cookie_second_line_empty_first_line(self):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso8859-15')
expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
self.assertEqual(consumed_lines, expected)
@ -1394,7 +1398,7 @@ def test_latin1_normalization(self):
b"print(things)\n",
b"do_something += 4\n")
rl = self.get_readline(lines)
found, consumed_lines = detect_encoding(rl)
found, consumed_lines = tokenize.detect_encoding(rl)
self.assertEqual(found, "iso-8859-1")
def test_syntaxerror_latin1(self):
@ -1404,7 +1408,7 @@ def test_syntaxerror_latin1(self):
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_utf8_normalization(self):
@ -1417,36 +1421,36 @@ def test_utf8_normalization(self):
b"# coding: " + enc.encode("ascii") + b"\n",
b"1 + 3\n")
rl = self.get_readline(lines)
found, consumed_lines = detect_encoding(rl)
found, consumed_lines = tokenize.detect_encoding(rl)
self.assertEqual(found, "utf-8")
def test_short_files(self):
readline = self.get_readline((b'print(something)\n',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [b'print(something)\n'])
encoding, consumed_lines = detect_encoding(self.get_readline(()))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(()))
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [])
readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [b'print(something)\n'])
readline = self.get_readline((b'\xef\xbb\xbf',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [])
readline = self.get_readline((b'# coding: bad\n',))
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_false_encoding(self):
# Issue 18873: "Encoding" detected in non-comment lines
readline = self.get_readline((b'print("#coding=fake")',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
@ -1459,14 +1463,14 @@ def test_open(self):
with open(filename, 'w', encoding=encoding) as fp:
print("# coding: %s" % encoding, file=fp)
print("print('euro:\u20ac')", file=fp)
with tokenize_open(filename) as fp:
with tokenize.open(filename) as fp:
self.assertEqual(fp.encoding, encoding)
self.assertEqual(fp.mode, 'r')
# test BOM (no coding cookie)
with open(filename, 'w', encoding='utf-8-sig') as fp:
print("print('euro:\u20ac')", file=fp)
with tokenize_open(filename) as fp:
with tokenize.open(filename) as fp:
self.assertEqual(fp.encoding, 'utf-8-sig')
self.assertEqual(fp.mode, 'r')
@ -1493,16 +1497,16 @@ def readline(self):
ins = Bunk(lines, path)
# Make sure lacking a name isn't an issue.
del ins.name
detect_encoding(ins.readline)
tokenize.detect_encoding(ins.readline)
with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
ins = Bunk(lines, path)
detect_encoding(ins.readline)
tokenize.detect_encoding(ins.readline)
def test_open_error(self):
# Issue #23840: open() must close the binary file on error
m = BytesIO(b'#coding:xxx')
with mock.patch('tokenize._builtin_open', return_value=m):
self.assertRaises(SyntaxError, tokenize_open, 'foobar')
self.assertRaises(SyntaxError, tokenize.open, 'foobar')
self.assertTrue(m.closed)
@ -1542,7 +1546,7 @@ def mock_readline():
tokenize_module.detect_encoding = mock_detect_encoding
tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
try:
results = tokenize(mock_readline)
results = tokenize.tokenize(mock_readline)
self.assertEqual(list(results)[1:],
[b'first', b'second', b'1', b'2', b'3', b'4'])
finally:
@ -1559,23 +1563,23 @@ def test_oneline_defs(self):
buf = '\n'.join(buf)
# Test that 500 consequent, one-line defs is OK
toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline))
self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
# [-2] is always NEWLINE
def assertExactTypeEqual(self, opstr, *optypes):
tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline))
num_optypes = len(optypes)
self.assertEqual(len(tokens), 3 + num_optypes)
self.assertEqual(tok_name[tokens[0].exact_type],
tok_name[ENCODING])
self.assertEqual(tokenize.tok_name[tokens[0].exact_type],
tokenize.tok_name[tokenize.ENCODING])
for i in range(num_optypes):
self.assertEqual(tok_name[tokens[i + 1].exact_type],
tok_name[optypes[i]])
self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
tok_name[token.NEWLINE])
self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
tok_name[token.ENDMARKER])
self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type],
tokenize.tok_name[optypes[i]])
self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type],
tokenize.tok_name[token.NEWLINE])
self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type],
tokenize.tok_name[token.ENDMARKER])
def test_exact_type(self):
self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
@ -1625,11 +1629,11 @@ def test_exact_type(self):
self.assertExactTypeEqual('@=', token.ATEQUAL)
self.assertExactTypeEqual('a**2+b**2==c**2',
NAME, token.DOUBLESTAR, NUMBER,
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
token.PLUS,
NAME, token.DOUBLESTAR, NUMBER,
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
token.EQEQUAL,
NAME, token.DOUBLESTAR, NUMBER)
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER)
self.assertExactTypeEqual('{1, 2, 3}',
token.LBRACE,
token.NUMBER, token.COMMA,
@ -1650,32 +1654,32 @@ def test_comment_at_the_end_of_the_source_without_newline(self):
# See http://bugs.python.org/issue44667
source = 'b = 1\n\n#test'
expected_tokens = [
TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
]
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
self.assertEqual(tokens, expected_tokens)
def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
# See https://github.com/python/cpython/issues/105435
source = 'a\n '
expected_tokens = [
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
]
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
self.assertEqual(tokens, expected_tokens)
def test_invalid_character_in_fstring_middle(self):
@ -1695,7 +1699,7 @@ class UntokenizeTest(TestCase):
def test_bad_input_order(self):
# raise if previous row
u = Untokenizer()
u = tokenize.Untokenizer()
u.prev_row = 2
u.prev_col = 2
with self.assertRaises(ValueError) as cm:
@ -1707,7 +1711,7 @@ def test_bad_input_order(self):
def test_backslash_continuation(self):
# The problem is that <whitespace>\<newline> leaves no token
u = Untokenizer()
u = tokenize.Untokenizer()
u.prev_row = 1
u.prev_col = 1
u.tokens = []
@ -1719,17 +1723,17 @@ def test_backslash_continuation(self):
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
def test_iter_compat(self):
u = Untokenizer()
token = (NAME, 'Hello')
tokens = [(ENCODING, 'utf-8'), token]
u = tokenize.Untokenizer()
token = (tokenize.NAME, 'Hello')
tokens = [(tokenize.ENCODING, 'utf-8'), token]
u.compat(token, iter([]))
self.assertEqual(u.tokens, ["Hello "])
u = Untokenizer()
u = tokenize.Untokenizer()
self.assertEqual(u.untokenize(iter([token])), 'Hello ')
u = Untokenizer()
u = tokenize.Untokenizer()
self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
self.assertEqual(u.encoding, 'utf-8')
self.assertEqual(untokenize(iter(tokens)), b'Hello ')
self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ')
class TestRoundtrip(TestCase):
@ -1752,17 +1756,17 @@ def check_roundtrip(self, f):
else:
code = f.read()
readline = iter(code.splitlines(keepends=True)).__next__
tokens5 = list(tokenize(readline))
tokens5 = list(tokenize.tokenize(readline))
tokens2 = [tok[:2] for tok in tokens5]
# Reproduce tokens2 from pairs
bytes_from2 = untokenize(tokens2)
bytes_from2 = tokenize.untokenize(tokens2)
readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)]
self.assertEqual(tokens2_from2, tokens2)
# Reproduce tokens2 from 5-tuples
bytes_from5 = untokenize(tokens5)
bytes_from5 = tokenize.untokenize(tokens5)
readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
self.assertEqual(tokens2_from5, tokens2)
def check_line_extraction(self, f):
@ -1771,8 +1775,8 @@ def check_line_extraction(self, f):
else:
code = f.read()
readline = iter(code.splitlines(keepends=True)).__next__
for tok in tokenize(readline):
if tok.type in {ENCODING, ENDMARKER}:
for tok in tokenize.tokenize(readline):
if tok.type in {tokenize.ENCODING, tokenize.ENDMARKER}:
continue
self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
@ -1878,7 +1882,7 @@ def test_random_files(self):
def roundtrip(self, code):
if isinstance(code, str):
code = code.encode('utf-8')
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8')
def test_indentation_semantics_retained(self):
"""
@ -1896,27 +1900,27 @@ def test_number_followed_by_name(self):
# See issue #gh-105549
source = "2sin(x)"
expected_tokens = [
TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
tokens = list(generate_tokens(StringIO(source).readline))
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
self.assertEqual(tokens, expected_tokens)
def test_number_starting_with_zero(self):
source = "01234"
expected_tokens = [
TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
tokens = list(generate_tokens(StringIO(source).readline))
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
self.assertEqual(tokens, expected_tokens)
class CTokenizeTest(TestCase):
@ -1926,7 +1930,7 @@ def check_tokenize(self, s, expected):
f = StringIO(s)
with self.subTest(source=s):
result = stringify_tokens_from_source(
_generate_tokens_from_c_tokenizer(f.readline), s
tokenize._generate_tokens_from_c_tokenizer(f.readline), s
)
self.assertEqual(result, expected.rstrip().splitlines())
@ -1935,15 +1939,15 @@ def readline(encoding):
yield "1+1".encode(encoding)
expected = [
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
for encoding in ["utf-8", "latin-1", "utf-16"]:
with self.subTest(encoding=encoding):
tokens = list(_generate_tokens_from_c_tokenizer(
tokens = list(tokenize._generate_tokens_from_c_tokenizer(
readline(encoding).__next__,
extra_tokens=True,
encoding=encoding,
@ -2796,7 +2800,7 @@ def test_unicode(self):
def test_invalid_syntax(self):
def get_tokens(string):
the_string = StringIO(string)
return list(_generate_tokens_from_c_tokenizer(the_string.readline))
return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline))
for case in [
"(1+2]",
@ -2832,7 +2836,7 @@ def get_tokens(string):
"]",
]:
with self.subTest(case=case):
self.assertRaises(TokenError, get_tokens, case)
self.assertRaises(tokenize.TokenError, get_tokens, case)
def test_max_indent(self):
MAXINDENT = 100
@ -2844,14 +2848,14 @@ def generate_source(indents):
valid = generate_source(MAXINDENT - 1)
the_input = StringIO(valid)
tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline))
self.assertEqual(tokens[-2].type, DEDENT)
self.assertEqual(tokens[-1].type, ENDMARKER)
tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))
self.assertEqual(tokens[-2].type, tokenize.DEDENT)
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
compile(valid, "<string>", "exec")
invalid = generate_source(MAXINDENT)
the_input = StringIO(invalid)
self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)))
self.assertRaises(
IndentationError, compile, invalid, "<string>", "exec"
)
@ -2860,7 +2864,7 @@ def test_continuation_lines_indentation(self):
def get_tokens(string):
the_string = StringIO(string)
return [(kind, string) for (kind, string, *_)
in _generate_tokens_from_c_tokenizer(the_string.readline)]
in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)]
code = dedent("""
def fib(n):

View file

@ -41,7 +41,7 @@
import token
__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
"untokenize", "TokenInfo"]
"untokenize", "TokenInfo", "open", "TokenError"]
del token
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@ -162,8 +162,6 @@ def _compile(expr):
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
class Untokenizer:
def __init__(self):