gh-57141: Add dircmp shallow option (GH-109499)

Co-authored-by: Steve Ward <planet36@gmail.com>
Co-authored-by: Sanyam Khurana <8039608+CuriousLearner@users.noreply.github.com>
This commit is contained in:
Tobias Rautenkranz 2024-03-04 18:27:43 +01:00 committed by GitHub
parent ea1b1c579f
commit 60743a9a7e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 120 additions and 31 deletions

View file

@ -70,7 +70,7 @@ The :mod:`filecmp` module defines the following functions:
The :class:`dircmp` class
-------------------------
.. class:: dircmp(a, b, ignore=None, hide=None)
.. class:: dircmp(a, b, ignore=None, hide=None, shallow=True)
Construct a new directory comparison object, to compare the directories *a*
and *b*. *ignore* is a list of names to ignore, and defaults to
@ -78,7 +78,12 @@ The :class:`dircmp` class
defaults to ``[os.curdir, os.pardir]``.
The :class:`dircmp` class compares files by doing *shallow* comparisons
as described for :func:`filecmp.cmp`.
as described for :func:`filecmp.cmp` by default using the *shallow*
parameter.
.. versionchanged:: 3.13
Added the *shallow* parameter.
The :class:`dircmp` class provides the following methods:

View file

@ -88,12 +88,15 @@ def _do_cmp(f1, f2):
class dircmp:
"""A class that manages the comparison of 2 directories.
dircmp(a, b, ignore=None, hide=None)
dircmp(a, b, ignore=None, hide=None, shallow=True)
A and B are directories.
IGNORE is a list of names to ignore,
defaults to DEFAULT_IGNORES.
HIDE is a list of names to hide,
defaults to [os.curdir, os.pardir].
SHALLOW specifies whether to just check the stat signature (do not read
the files).
defaults to True.
High level usage:
x = dircmp(dir1, dir2)
@ -121,7 +124,7 @@ class dircmp:
in common_dirs.
"""
def __init__(self, a, b, ignore=None, hide=None): # Initialize
def __init__(self, a, b, ignore=None, hide=None, shallow=True): # Initialize
self.left = a
self.right = b
if hide is None:
@ -132,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize
self.ignore = DEFAULT_IGNORES
else:
self.ignore = ignore
self.shallow = shallow
def phase0(self): # Compare everything except common subdirectories
self.left_list = _filter(os.listdir(self.left),
@ -184,7 +188,7 @@ def phase2(self): # Distinguish files, directories, funnies
self.common_funny.append(x)
def phase3(self): # Find out differences between common files
xx = cmpfiles(self.left, self.right, self.common_files)
xx = cmpfiles(self.left, self.right, self.common_files, self.shallow)
self.same_files, self.diff_files, self.funny_files = xx
def phase4(self): # Find out differences between common subdirectories
@ -196,7 +200,8 @@ def phase4(self): # Find out differences between common subdirectories
for x in self.common_dirs:
a_x = os.path.join(self.left, x)
b_x = os.path.join(self.right, x)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide,
self.shallow)
def phase4_closure(self): # Recursively call phase4() on subdirectories
self.phase4()

View file

@ -8,11 +8,24 @@
from test.support import os_helper
def _create_file_shallow_equal(template_path, new_path):
"""create a file with the same size and mtime but different content."""
shutil.copy2(template_path, new_path)
with open(new_path, 'r+b') as f:
next_char = bytearray(f.read(1))
next_char[0] = (next_char[0] + 1) % 256
f.seek(0)
f.write(next_char)
shutil.copystat(template_path, new_path)
assert os.stat(new_path).st_size == os.stat(template_path).st_size
assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime
class FileCompareTestCase(unittest.TestCase):
def setUp(self):
self.name = os_helper.TESTFN
self.name_same = os_helper.TESTFN + '-same'
self.name_diff = os_helper.TESTFN + '-diff'
self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
data = 'Contents of file go here.\n'
for name in [self.name, self.name_same, self.name_diff]:
with open(name, 'w', encoding="utf-8") as output:
@ -20,12 +33,19 @@ def setUp(self):
with open(self.name_diff, 'a+', encoding="utf-8") as output:
output.write('An extra line.\n')
for name in [self.name_same, self.name_diff]:
shutil.copystat(self.name, name)
_create_file_shallow_equal(self.name, self.name_same_shallow)
self.dir = tempfile.gettempdir()
def tearDown(self):
os.unlink(self.name)
os.unlink(self.name_same)
os.unlink(self.name_diff)
os.unlink(self.name_same_shallow)
def test_matching(self):
self.assertTrue(filecmp.cmp(self.name, self.name),
@ -36,12 +56,17 @@ def test_matching(self):
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
"Shallow identical files should be considered equal")
def test_different(self):
self.assertFalse(filecmp.cmp(self.name, self.name_diff),
"Mismatched files compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.dir),
"File and directory compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
shallow=False),
"Mismatched file to shallow identical file compares as equal")
def test_cache_clear(self):
first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
@ -56,6 +81,8 @@ def setUp(self):
self.dir = os.path.join(tmpdir, 'dir')
self.dir_same = os.path.join(tmpdir, 'dir-same')
self.dir_diff = os.path.join(tmpdir, 'dir-diff')
self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')
# Another dir is created under dir_same, but it has a name from the
# ignored list so it should not affect testing results.
@ -63,7 +90,17 @@ def setUp(self):
self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
data = 'Contents of file go here.\n'
for dir in (self.dir, self.dir_same, self.dir_diff, self.dir_ignored):
shutil.rmtree(self.dir, True)
os.mkdir(self.dir)
subdir_path = os.path.join(self.dir, 'subdir')
os.mkdir(subdir_path)
dir_file_path = os.path.join(self.dir, "file")
with open(dir_file_path, 'w', encoding="utf-8") as output:
output.write(data)
for dir in (self.dir_same, self.dir_same_shallow,
self.dir_diff, self.dir_diff_file):
shutil.rmtree(dir, True)
os.mkdir(dir)
subdir_path = os.path.join(dir, 'subdir')
@ -72,14 +109,25 @@ def setUp(self):
fn = 'FiLe' # Verify case-insensitive comparison
else:
fn = 'file'
with open(os.path.join(dir, fn), 'w', encoding="utf-8") as output:
output.write(data)
file_path = os.path.join(dir, fn)
if dir is self.dir_same_shallow:
_create_file_shallow_equal(dir_file_path, file_path)
else:
shutil.copy2(dir_file_path, file_path)
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('An extra file.\n')
# Add different file2 with respect to dir_diff
with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
def tearDown(self):
for dir in (self.dir, self.dir_same, self.dir_diff):
for dir in (self.dir, self.dir_same, self.dir_diff,
self.dir_same_shallow, self.dir_diff_file):
shutil.rmtree(dir)
def test_default_ignores(self):
@ -102,11 +150,7 @@ def test_cmpfiles(self):
shallow=False),
"Comparing directory to same fails")
# Add different file2
with open(os.path.join(self.dir, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_same,
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
['file', 'file2']) ==
(['file'], ['file2'], []),
"Comparing mismatched directories fails")
@ -116,11 +160,22 @@ def _assert_lists(self, actual, expected):
"""Assert that two lists are equal, up to ordering."""
self.assertEqual(sorted(actual), sorted(expected))
def test_dircmp_identical_directories(self):
self._assert_dircmp_identical_directories()
self._assert_dircmp_identical_directories(shallow=False)
def test_dircmp(self):
def test_dircmp_different_file(self):
self._assert_dircmp_different_file()
self._assert_dircmp_different_file(shallow=False)
def test_dircmp_different_directories(self):
self._assert_dircmp_different_directories()
self._assert_dircmp_different_directories(shallow=False)
def _assert_dircmp_identical_directories(self, **options):
# Check attributes for comparison of two identical directories
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
if self.caseinsensitive:
@ -142,9 +197,10 @@ def test_dircmp(self):
]
self._assert_report(d.report, expected_report)
def _assert_dircmp_different_directories(self, **options):
# Check attributes for comparison of two different directories (right)
left_dir, right_dir = self.dir, self.dir_diff
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'subdir'])
@ -164,12 +220,8 @@ def test_dircmp(self):
self._assert_report(d.report, expected_report)
# Check attributes for comparison of two different directories (left)
left_dir, right_dir = self.dir, self.dir_diff
shutil.move(
os.path.join(self.dir_diff, 'file2'),
os.path.join(self.dir, 'file2')
)
d = filecmp.dircmp(left_dir, right_dir)
left_dir, right_dir = self.dir_diff, self.dir
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
@ -180,27 +232,51 @@ def test_dircmp(self):
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"Only in {} : ['file2']".format(self.dir),
"diff {} {}".format(self.dir_diff, self.dir),
"Only in {} : ['file2']".format(self.dir_diff),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
# Add different file2
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
d = filecmp.dircmp(self.dir, self.dir_diff)
def _assert_dircmp_different_file(self, **options):
# A different file2
d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, ['file2'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"diff {} {}".format(self.dir_diff, self.dir_diff_file),
"Identical files : ['file']",
"Differing files : ['file2']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_no_shallow_different_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
self.assertEqual(d.same_files, [])
self.assertEqual(d.diff_files, ['file'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Differing files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_shallow_same_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)
def test_dircmp_subdirs_type(self):
"""Check that dircmp.subdirs respects subclassing."""
class MyDirCmp(filecmp.dircmp):

View file

@ -0,0 +1,3 @@
Add option for *non-shallow* comparisons to :class:`filecmp.dircmp` like
:func:`filecmp.cmp`. Original patch by Steven Ward. Enhanced by
Tobias Rautenkranz