From c3b6dbff2c8886de1edade737febe85dd47ff4d0 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 10 Jun 2024 13:06:18 +0200 Subject: [PATCH] gh-115801: Only allow sequence of strings as input for difflib.unified_diff (GH-118333) --- Lib/difflib.py | 6 ++++ Lib/test/test_difflib.py | 30 +++++++++++++++---- ...-04-27-18-36-46.gh-issue-115801.SVeHSy.rst | 1 + 3 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst diff --git a/Lib/difflib.py b/Lib/difflib.py index 0443963b4fd..7f595b6c72e 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1264,6 +1264,12 @@ def _check_types(a, b, *args): if b and not isinstance(b[0], str): raise TypeError('lines to compare must be str, not %s (%r)' % (type(b[0]).__name__, b[0])) + if isinstance(a, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(a).__name__) + if isinstance(b, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(b).__name__) for arg in args: if not isinstance(arg, str): raise TypeError('all arguments must be str, not: %r' % (arg,)) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index bf6e5b1152b..9e217249be7 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -295,7 +295,7 @@ def test_close_matches_aligned(self): class TestOutputFormat(unittest.TestCase): def test_tab_delimiter(self): - args = ['one', 'two', 'Original', 'Current', + args = [['one'], ['two'], 'Original', 'Current', '2005-01-26 23:30:50', '2010-04-02 10:20:52'] ud = difflib.unified_diff(*args, lineterm='') self.assertEqual(list(ud)[0:2], [ @@ -307,7 +307,7 @@ def test_tab_delimiter(self): "--- Current\t2010-04-02 10:20:52"]) def test_no_trailing_tab_on_empty_filedate(self): - args = ['one', 'two', 'Original', 'Current'] + args = [['one'], ['two'], 'Original', 'Current'] ud = difflib.unified_diff(*args, lineterm='') self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) @@ -447,6 +447,28 @@ def assertDiff(expect, actual): lineterm=b'') assertDiff(expect, actual) + +class TestInputTypes(unittest.TestCase): + def _assert_type_error(self, msg, generator, *args): + with self.assertRaises(TypeError) as ctx: + list(generator(*args)) + self.assertEqual(msg, str(ctx.exception)) + + def test_input_type_checks(self): + unified = difflib.unified_diff + context = difflib.context_diff + + expect = "input must be a sequence of strings, not str" + self._assert_type_error(expect, unified, 'a', ['b']) + self._assert_type_error(expect, context, 'a', ['b']) + + self._assert_type_error(expect, unified, ['a'], 'b') + self._assert_type_error(expect, context, ['a'], 'b') + + expect = "lines to compare must be str, not NoneType (None)" + self._assert_type_error(expect, unified, ['a'], [None]) + self._assert_type_error(expect, context, ['a'], [None]) + def test_mixed_types_content(self): # type of input content must be consistent: all str or all bytes a = [b'hello'] @@ -495,10 +517,6 @@ def test_mixed_types_dates(self): b = ['bar\n'] list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) - def _assert_type_error(self, msg, generator, *args): - with self.assertRaises(TypeError) as ctx: - list(generator(*args)) - self.assertEqual(msg, str(ctx.exception)) class TestJunkAPIs(unittest.TestCase): def test_is_line_junk_true(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst new file mode 100644 index 00000000000..93b176d5767 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst @@ -0,0 +1 @@ +Raise ``TypeError`` when passing a string to :func:`difflib.unified_diff` and :func:`difflib.context_diff`.