mirror of
https://github.com/python/cpython
synced 2024-10-03 01:02:35 +00:00
[3.12] gh-121188: Sanitize invalid XML characters in regrtest (GH-121195) (#121205)
gh-121188: Sanitize invalid XML characters in regrtest (GH-121195)
When creating the JUnit XML file, regrtest now escapes characters
which are invalid in XML, such as the chr(27) control character used
in ANSI escape sequences.
(cherry picked from commit af8c3d7a26
)
Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
99bc8589f0
commit
b80edafff2
|
@ -9,6 +9,7 @@
|
|||
import traceback
|
||||
import unittest
|
||||
from test import support
|
||||
from test.libregrtest.utils import sanitize_xml
|
||||
|
||||
class RegressionTestResult(unittest.TextTestResult):
|
||||
USE_XML = False
|
||||
|
@ -65,23 +66,24 @@ def _add_result(self, test, capture=False, **args):
|
|||
if capture:
|
||||
if self._stdout_buffer is not None:
|
||||
stdout = self._stdout_buffer.getvalue().rstrip()
|
||||
ET.SubElement(e, 'system-out').text = stdout
|
||||
ET.SubElement(e, 'system-out').text = sanitize_xml(stdout)
|
||||
if self._stderr_buffer is not None:
|
||||
stderr = self._stderr_buffer.getvalue().rstrip()
|
||||
ET.SubElement(e, 'system-err').text = stderr
|
||||
ET.SubElement(e, 'system-err').text = sanitize_xml(stderr)
|
||||
|
||||
for k, v in args.items():
|
||||
if not k or not v:
|
||||
continue
|
||||
|
||||
e2 = ET.SubElement(e, k)
|
||||
if hasattr(v, 'items'):
|
||||
for k2, v2 in v.items():
|
||||
if k2:
|
||||
e2.set(k2, str(v2))
|
||||
e2.set(k2, sanitize_xml(str(v2)))
|
||||
else:
|
||||
e2.text = str(v2)
|
||||
e2.text = sanitize_xml(str(v2))
|
||||
else:
|
||||
e2.text = str(v)
|
||||
e2.text = sanitize_xml(str(v))
|
||||
|
||||
@classmethod
|
||||
def __makeErrorDict(cls, err_type, err_value, err_tb):
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import os.path
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import shlex
|
||||
import signal
|
||||
import subprocess
|
||||
|
@ -710,3 +711,24 @@ def get_signal_name(exitcode):
|
|||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
ILLEGAL_XML_CHARS_RE = re.compile(
|
||||
'['
|
||||
# Control characters; newline (\x0A and \x0D) and TAB (\x09) are legal
|
||||
'\x00-\x08\x0B\x0C\x0E-\x1F'
|
||||
# Surrogate characters
|
||||
'\uD800-\uDFFF'
|
||||
# Special Unicode characters
|
||||
'\uFFFE'
|
||||
'\uFFFF'
|
||||
# Match multiple sequential invalid characters for better effiency
|
||||
']+')
|
||||
|
||||
def _sanitize_xml_replace(regs):
|
||||
text = regs[0]
|
||||
return ''.join(f'\\x{ord(ch):02x}' if ch <= '\xff' else ascii(ch)[1:-1]
|
||||
for ch in text)
|
||||
|
||||
def sanitize_xml(text):
|
||||
return ILLEGAL_XML_CHARS_RE.sub(_sanitize_xml_replace, text)
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
import tempfile
|
||||
import textwrap
|
||||
import unittest
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from test import support
|
||||
from test.support import os_helper
|
||||
from test.libregrtest import cmdline
|
||||
|
@ -2221,6 +2223,44 @@ def test_pass(self):
|
|||
self.check_executed_tests(output, testname, stats=1, parallel=True)
|
||||
self.assertNotIn('SPAM SPAM SPAM', output)
|
||||
|
||||
def test_xml(self):
|
||||
code = textwrap.dedent(r"""
|
||||
import unittest
|
||||
from test import support
|
||||
|
||||
class VerboseTests(unittest.TestCase):
|
||||
def test_failed(self):
|
||||
print("abc \x1b def")
|
||||
self.fail()
|
||||
""")
|
||||
testname = self.create_test(code=code)
|
||||
|
||||
# Run sequentially
|
||||
filename = os_helper.TESTFN
|
||||
self.addCleanup(os_helper.unlink, filename)
|
||||
|
||||
output = self.run_tests(testname, "--junit-xml", filename,
|
||||
exitcode=EXITCODE_BAD_TEST)
|
||||
self.check_executed_tests(output, testname,
|
||||
failed=testname,
|
||||
stats=TestStats(1, 1, 0))
|
||||
|
||||
# Test generated XML
|
||||
with open(filename, encoding="utf8") as fp:
|
||||
content = fp.read()
|
||||
|
||||
testsuite = ElementTree.fromstring(content)
|
||||
self.assertEqual(int(testsuite.get('tests')), 1)
|
||||
self.assertEqual(int(testsuite.get('errors')), 0)
|
||||
self.assertEqual(int(testsuite.get('failures')), 1)
|
||||
|
||||
testcase = testsuite[0][0]
|
||||
self.assertEqual(testcase.get('status'), 'run')
|
||||
self.assertEqual(testcase.get('result'), 'completed')
|
||||
self.assertGreater(float(testcase.get('time')), 0)
|
||||
for out in testcase.iter('system-out'):
|
||||
self.assertEqual(out.text, r"abc \x1b def")
|
||||
|
||||
|
||||
class TestUtils(unittest.TestCase):
|
||||
def test_format_duration(self):
|
||||
|
@ -2403,6 +2443,25 @@ def id(self):
|
|||
self.assertTrue(match_test(test_chdir))
|
||||
self.assertFalse(match_test(test_copy))
|
||||
|
||||
def test_sanitize_xml(self):
|
||||
sanitize_xml = utils.sanitize_xml
|
||||
|
||||
# escape invalid XML characters
|
||||
self.assertEqual(sanitize_xml('abc \x1b\x1f def'),
|
||||
r'abc \x1b\x1f def')
|
||||
self.assertEqual(sanitize_xml('nul:\x00, bell:\x07'),
|
||||
r'nul:\x00, bell:\x07')
|
||||
self.assertEqual(sanitize_xml('surrogate:\uDC80'),
|
||||
r'surrogate:\udc80')
|
||||
self.assertEqual(sanitize_xml('illegal \uFFFE and \uFFFF'),
|
||||
r'illegal \ufffe and \uffff')
|
||||
|
||||
# no escape for valid XML characters
|
||||
self.assertEqual(sanitize_xml('a\n\tb'),
|
||||
'a\n\tb')
|
||||
self.assertEqual(sanitize_xml('valid t\xe9xt \u20ac'),
|
||||
'valid t\xe9xt \u20ac')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
When creating the JUnit XML file, regrtest now escapes characters which are
|
||||
invalid in XML, such as the chr(27) control character used in ANSI escape
|
||||
sequences. Patch by Victor Stinner.
|
Loading…
Reference in a new issue