From b690a2759e62d9ee0b6ea1b20e8f7e4b2cdbf8bb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Oct 2019 14:32:25 +0300 Subject: [PATCH] bpo-36698: IDLE no longer fails when write non-encodable characters to stderr. (GH-16583) It now escapes them with a backslash, as the regular Python interpreter. Added the "errors" field to the standard streams. --- Lib/idlelib/idle_test/test_run.py | 60 ++++++++++++------- Lib/idlelib/iomenu.py | 41 ++++++------- Lib/idlelib/pyshell.py | 14 +++-- Lib/idlelib/run.py | 36 ++++++----- .../2019-10-04-18-03-09.bpo-36698.BKcmom.rst | 3 + 5 files changed, 87 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/IDLE/2019-10-04-18-03-09.bpo-36698.BKcmom.rst diff --git a/Lib/idlelib/idle_test/test_run.py b/Lib/idlelib/idle_test/test_run.py index cad0b4d98f8..9995dbe2eca 100644 --- a/Lib/idlelib/idle_test/test_run.py +++ b/Lib/idlelib/idle_test/test_run.py @@ -36,7 +36,7 @@ def __eq__(self, other): self.assertIn('UnhashableException: ex1', tb[10]) -# PseudoFile tests. +# StdioFile tests. class S(str): def __str__(self): @@ -68,14 +68,14 @@ def push(self, lines): self.lines = list(lines)[::-1] -class PseudeInputFilesTest(unittest.TestCase): +class StdInputFilesTest(unittest.TestCase): def test_misc(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') self.assertIsInstance(f, io.TextIOBase) self.assertEqual(f.encoding, 'utf-8') - self.assertIsNone(f.errors) + self.assertEqual(f.errors, 'strict') self.assertIsNone(f.newlines) self.assertEqual(f.name, '') self.assertFalse(f.closed) @@ -86,7 +86,7 @@ def test_misc(self): def test_unsupported(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') self.assertRaises(OSError, f.fileno) self.assertRaises(OSError, f.tell) self.assertRaises(OSError, f.seek, 0) @@ -95,7 +95,7 @@ def test_unsupported(self): def test_read(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') shell.push(['one\n', 'two\n', '']) self.assertEqual(f.read(), 'one\ntwo\n') shell.push(['one\n', 'two\n', '']) @@ -115,7 +115,7 @@ def test_read(self): def test_readline(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') shell.push(['one\n', 'two\n', 'three\n', 'four\n']) self.assertEqual(f.readline(), 'one\n') self.assertEqual(f.readline(-1), 'two\n') @@ -140,7 +140,7 @@ def test_readline(self): def test_readlines(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') shell.push(['one\n', 'two\n', '']) self.assertEqual(f.readlines(), ['one\n', 'two\n']) shell.push(['one\n', 'two\n', '']) @@ -161,7 +161,7 @@ def test_readlines(self): def test_close(self): shell = MockShell() - f = run.PseudoInputFile(shell, 'stdin', 'utf-8') + f = run.StdInputFile(shell, 'stdin') shell.push(['one\n', 'two\n', '']) self.assertFalse(f.closed) self.assertEqual(f.readline(), 'one\n') @@ -171,14 +171,14 @@ def test_close(self): self.assertRaises(TypeError, f.close, 1) -class PseudeOutputFilesTest(unittest.TestCase): +class StdOutputFilesTest(unittest.TestCase): def test_misc(self): shell = MockShell() - f = run.PseudoOutputFile(shell, 'stdout', 'utf-8') + f = run.StdOutputFile(shell, 'stdout') self.assertIsInstance(f, io.TextIOBase) self.assertEqual(f.encoding, 'utf-8') - self.assertIsNone(f.errors) + self.assertEqual(f.errors, 'strict') self.assertIsNone(f.newlines) self.assertEqual(f.name, '') self.assertFalse(f.closed) @@ -189,7 +189,7 @@ def test_misc(self): def test_unsupported(self): shell = MockShell() - f = run.PseudoOutputFile(shell, 'stdout', 'utf-8') + f = run.StdOutputFile(shell, 'stdout') self.assertRaises(OSError, f.fileno) self.assertRaises(OSError, f.tell) self.assertRaises(OSError, f.seek, 0) @@ -198,16 +198,36 @@ def test_unsupported(self): def test_write(self): shell = MockShell() - f = run.PseudoOutputFile(shell, 'stdout', 'utf-8') + f = run.StdOutputFile(shell, 'stdout') f.write('test') self.assertEqual(shell.written, [('test', 'stdout')]) shell.reset() - f.write('t\xe8st') - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) + f.write('t\xe8\u015b\U0001d599') + self.assertEqual(shell.written, [('t\xe8\u015b\U0001d599', 'stdout')]) shell.reset() - f.write(S('t\xe8st')) - self.assertEqual(shell.written, [('t\xe8st', 'stdout')]) + f.write(S('t\xe8\u015b\U0001d599')) + self.assertEqual(shell.written, [('t\xe8\u015b\U0001d599', 'stdout')]) + self.assertEqual(type(shell.written[0][0]), str) + shell.reset() + + self.assertRaises(TypeError, f.write) + self.assertEqual(shell.written, []) + self.assertRaises(TypeError, f.write, b'test') + self.assertRaises(TypeError, f.write, 123) + self.assertEqual(shell.written, []) + self.assertRaises(TypeError, f.write, 'test', 'spam') + self.assertEqual(shell.written, []) + + def test_write_stderr_nonencodable(self): + shell = MockShell() + f = run.StdOutputFile(shell, 'stderr', 'iso-8859-15', 'backslashreplace') + f.write('t\xe8\u015b\U0001d599\xa4') + self.assertEqual(shell.written, [('t\xe8\\u015b\\U0001d599\\xa4', 'stderr')]) + shell.reset() + + f.write(S('t\xe8\u015b\U0001d599\xa4')) + self.assertEqual(shell.written, [('t\xe8\\u015b\\U0001d599\\xa4', 'stderr')]) self.assertEqual(type(shell.written[0][0]), str) shell.reset() @@ -221,7 +241,7 @@ def test_write(self): def test_writelines(self): shell = MockShell() - f = run.PseudoOutputFile(shell, 'stdout', 'utf-8') + f = run.StdOutputFile(shell, 'stdout') f.writelines([]) self.assertEqual(shell.written, []) shell.reset() @@ -251,7 +271,7 @@ def test_writelines(self): def test_close(self): shell = MockShell() - f = run.PseudoOutputFile(shell, 'stdout', 'utf-8') + f = run.StdOutputFile(shell, 'stdout') self.assertFalse(f.closed) f.write('test') f.close() diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index b9e813be063..b5533be79f9 100644 --- a/Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -15,6 +15,7 @@ if idlelib.testing: # Set True by test.test_idle to avoid setlocale. encoding = 'utf-8' + errors = 'surrogateescape' else: # Try setting the locale, so that we can find out # what encoding to use @@ -24,15 +25,9 @@ except (ImportError, locale.Error): pass - locale_decode = 'ascii' if sys.platform == 'win32': - # On Windows, we could use "mbcs". However, to give the user - # a portable encoding name, we need to find the code page - try: - locale_encoding = locale.getdefaultlocale()[1] - codecs.lookup(locale_encoding) - except LookupError: - pass + encoding = 'utf-8' + errors = 'surrogateescape' else: try: # Different things can fail here: the locale module may not be @@ -40,30 +35,30 @@ # resulting codeset may be unknown to Python. We ignore all # these problems, falling back to ASCII locale_encoding = locale.nl_langinfo(locale.CODESET) - if locale_encoding is None or locale_encoding == '': - # situation occurs on macOS - locale_encoding = 'ascii' - codecs.lookup(locale_encoding) + if locale_encoding: + codecs.lookup(locale_encoding) except (NameError, AttributeError, LookupError): # Try getdefaultlocale: it parses environment variables, # which may give a clue. Unfortunately, getdefaultlocale has # bugs that can cause ValueError. try: locale_encoding = locale.getdefaultlocale()[1] - if locale_encoding is None or locale_encoding == '': - # situation occurs on macOS - locale_encoding = 'ascii' - codecs.lookup(locale_encoding) + if locale_encoding: + codecs.lookup(locale_encoding) except (ValueError, LookupError): pass - locale_encoding = locale_encoding.lower() - - encoding = locale_encoding - # Encoding is used in multiple files; locale_encoding nowhere. - # The only use of 'encoding' below is in _decode as initial value - # of deprecated block asking user for encoding. - # Perhaps use elsewhere should be reviewed. + if locale_encoding: + encoding = locale_encoding.lower() + errors = 'strict' + else: + # POSIX locale or macOS + encoding = 'ascii' + errors = 'surrogateescape' + # Encoding is used in multiple files; locale_encoding nowhere. + # The only use of 'encoding' below is in _decode as initial value + # of deprecated block asking user for encoding. + # Perhaps use elsewhere should be reviewed. coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) diff --git a/Lib/idlelib/pyshell.py b/Lib/idlelib/pyshell.py index bc87d24eaab..065122dec7a 100755 --- a/Lib/idlelib/pyshell.py +++ b/Lib/idlelib/pyshell.py @@ -54,7 +54,7 @@ from idlelib.filelist import FileList from idlelib.outwin import OutputWindow from idlelib import rpc -from idlelib.run import idle_formatwarning, PseudoInputFile, PseudoOutputFile +from idlelib.run import idle_formatwarning, StdInputFile, StdOutputFile from idlelib.undo import UndoDelegator HOST = '127.0.0.1' # python execution server on localhost loopback @@ -902,10 +902,14 @@ def __init__(self, flist=None): self.save_stderr = sys.stderr self.save_stdin = sys.stdin from idlelib import iomenu - self.stdin = PseudoInputFile(self, "stdin", iomenu.encoding) - self.stdout = PseudoOutputFile(self, "stdout", iomenu.encoding) - self.stderr = PseudoOutputFile(self, "stderr", iomenu.encoding) - self.console = PseudoOutputFile(self, "console", iomenu.encoding) + self.stdin = StdInputFile(self, "stdin", + iomenu.encoding, iomenu.errors) + self.stdout = StdOutputFile(self, "stdout", + iomenu.encoding, iomenu.errors) + self.stderr = StdOutputFile(self, "stderr", + iomenu.encoding, "backslashreplace") + self.console = StdOutputFile(self, "console", + iomenu.encoding, iomenu.errors) if not use_subprocess: sys.stdout = self.stdout sys.stderr = self.stderr diff --git a/Lib/idlelib/run.py b/Lib/idlelib/run.py index 41e0ded4402..5bd84aadcd8 100644 --- a/Lib/idlelib/run.py +++ b/Lib/idlelib/run.py @@ -401,17 +401,22 @@ def handle_error(self, request, client_address): # Pseudofiles for shell-remote communication (also used in pyshell) -class PseudoFile(io.TextIOBase): +class StdioFile(io.TextIOBase): - def __init__(self, shell, tags, encoding=None): + def __init__(self, shell, tags, encoding='utf-8', errors='strict'): self.shell = shell self.tags = tags self._encoding = encoding + self._errors = errors @property def encoding(self): return self._encoding + @property + def errors(self): + return self._errors + @property def name(self): return '<%s>' % self.tags @@ -420,7 +425,7 @@ def isatty(self): return True -class PseudoOutputFile(PseudoFile): +class StdOutputFile(StdioFile): def writable(self): return True @@ -428,19 +433,12 @@ def writable(self): def write(self, s): if self.closed: raise ValueError("write to closed file") - if type(s) is not str: - if not isinstance(s, str): - raise TypeError('must be str, not ' + type(s).__name__) - # See issue #19481 - s = str.__str__(s) + s = str.encode(s, self.encoding, self.errors).decode(self.encoding, self.errors) return self.shell.write(s, self.tags) -class PseudoInputFile(PseudoFile): - - def __init__(self, shell, tags, encoding=None): - PseudoFile.__init__(self, shell, tags, encoding) - self._line_buffer = '' +class StdInputFile(StdioFile): + _line_buffer = '' def readable(self): return True @@ -495,12 +493,12 @@ def handle(self): executive = Executive(self) self.register("exec", executive) self.console = self.get_remote_proxy("console") - sys.stdin = PseudoInputFile(self.console, "stdin", - iomenu.encoding) - sys.stdout = PseudoOutputFile(self.console, "stdout", - iomenu.encoding) - sys.stderr = PseudoOutputFile(self.console, "stderr", - iomenu.encoding) + sys.stdin = StdInputFile(self.console, "stdin", + iomenu.encoding, iomenu.errors) + sys.stdout = StdOutputFile(self.console, "stdout", + iomenu.encoding, iomenu.errors) + sys.stderr = StdOutputFile(self.console, "stderr", + iomenu.encoding, "backslashreplace") sys.displayhook = rpc.displayhook # page help() text to shell. diff --git a/Misc/NEWS.d/next/IDLE/2019-10-04-18-03-09.bpo-36698.BKcmom.rst b/Misc/NEWS.d/next/IDLE/2019-10-04-18-03-09.bpo-36698.BKcmom.rst new file mode 100644 index 00000000000..5aaa3c92646 --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2019-10-04-18-03-09.bpo-36698.BKcmom.rst @@ -0,0 +1,3 @@ +IDLE no longer fails when write non-encodable characters to stderr. It now +escapes them with a backslash, as the regular Python interpreter. Added the +``errors`` field to the standard streams.