GH-110109: Add pathlib._PurePathBase (#110670)

Add private `pathlib._PurePathBase` class: a private superclass of both `PurePath` and `_PathBase`. Unlike `PurePath`, it does not define any of these special methods: `__fspath__`, `__bytes__`, `__reduce__`, `__hash__`, `__eq__`, `__lt__`, `__le__`, `__gt__`, `__ge__`. Its initializer and path joining methods accept only strings, not os.PathLike objects more broadly.

This is important for supporting *virtual paths*: user subclasses of `_PathBase` that provide access to archive files, FTP servers, etc. In these classes, the above methods should be implemented by users only as appropriate, with due consideration for the hash/equality of any backing objects, such as file objects or sockets.
This commit is contained in:
Barney Gale 2023-12-08 17:39:04 +00:00 committed by GitHub
parent 5a0137ca34
commit 76929fdeeb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 115 additions and 53 deletions

View file

@ -198,14 +198,13 @@ def __repr__(self):
return "<{}.parents>".format(type(self._path).__name__) return "<{}.parents>".format(type(self._path).__name__)
class PurePath: class _PurePathBase:
"""Base class for manipulating paths without I/O. """Base class for pure path objects.
PurePath represents a filesystem path and offers operations which This class *does not* provide several magic methods that are defined in
don't imply any actual filesystem I/O. Depending on your system, its subclass PurePath. They are: __fspath__, __bytes__, __reduce__,
instantiating a PurePath will return either a PurePosixPath or a __hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path
PureWindowsPath object. You can also instantiate either of these classes joining methods accept only strings, not os.PathLike objects more broadly.
directly, regardless of your system.
""" """
__slots__ = ( __slots__ = (
@ -227,22 +226,6 @@ class PurePath:
# for the first time. It's used to implement `_str_normcase` # for the first time. It's used to implement `_str_normcase`
'_str', '_str',
# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
# `__hash__()`, and `_parts_normcase`
'_str_normcase_cached',
# The `_parts_normcase_cached` slot stores the case-normalized
# string path after splitting on path separators. It's set when the
# `_parts_normcase` property is accessed for the first time. It's used
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',
# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',
# The '_resolving' slot stores a boolean indicating whether the path # The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `_PathBase.resolve()`. This prevents duplicate # is being processed by `_PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`. # work from occurring when `resolve()` calls `stat()` or `readlink()`.
@ -250,6 +233,10 @@ class PurePath:
) )
pathmod = os.path pathmod = os.path
def __init__(self, *paths):
self._raw_paths = paths
self._resolving = False
def with_segments(self, *pathsegments): def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects. """Construct a new path object from any number of path-like objects.
Subclasses may override this method to customize how new path objects Subclasses may override this method to customize how new path objects
@ -444,7 +431,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg,
remove=(3, 14)) remove=(3, 14))
other = self.with_segments(other, *_deprecated) other = self.with_segments(other, *_deprecated)
elif not isinstance(other, PurePath): elif not isinstance(other, _PurePathBase):
other = self.with_segments(other) other = self.with_segments(other)
for step, path in enumerate(chain([other], other.parents)): for step, path in enumerate(chain([other], other.parents)):
if path == self or path in self.parents: if path == self or path in self.parents:
@ -468,7 +455,7 @@ def is_relative_to(self, other, /, *_deprecated):
warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", warnings._deprecated("pathlib.PurePath.is_relative_to(*args)",
msg, remove=(3, 14)) msg, remove=(3, 14))
other = self.with_segments(other, *_deprecated) other = self.with_segments(other, *_deprecated)
elif not isinstance(other, PurePath): elif not isinstance(other, _PurePathBase):
other = self.with_segments(other) other = self.with_segments(other)
return other == self or other in self.parents return other == self or other in self.parents
@ -487,7 +474,7 @@ def joinpath(self, *pathsegments):
paths) or a totally different path (if one of the arguments is paths) or a totally different path (if one of the arguments is
anchored). anchored).
""" """
return self.with_segments(self, *pathsegments) return self.with_segments(*self._raw_paths, *pathsegments)
def __truediv__(self, key): def __truediv__(self, key):
try: try:
@ -497,7 +484,7 @@ def __truediv__(self, key):
def __rtruediv__(self, key): def __rtruediv__(self, key):
try: try:
return self.with_segments(key, self) return self.with_segments(key, *self._raw_paths)
except TypeError: except TypeError:
return NotImplemented return NotImplemented
@ -555,7 +542,7 @@ def match(self, path_pattern, *, case_sensitive=None):
""" """
Return True if this path matches the given pattern. Return True if this path matches the given pattern.
""" """
if not isinstance(path_pattern, PurePath): if not isinstance(path_pattern, _PurePathBase):
path_pattern = self.with_segments(path_pattern) path_pattern = self.with_segments(path_pattern)
if case_sensitive is None: if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod) case_sensitive = _is_case_sensitive(self.pathmod)
@ -570,6 +557,35 @@ def match(self, path_pattern, *, case_sensitive=None):
match = _compile_pattern(pattern_str, sep, case_sensitive) match = _compile_pattern(pattern_str, sep, case_sensitive)
return match(str(self)) is not None return match(str(self)) is not None
class PurePath(_PurePathBase):
"""Base class for manipulating paths without I/O.
PurePath represents a filesystem path and offers operations which
don't imply any actual filesystem I/O. Depending on your system,
instantiating a PurePath will return either a PurePosixPath or a
PureWindowsPath object. You can also instantiate either of these classes
directly, regardless of your system.
"""
__slots__ = (
# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
# `__hash__()`, and `_parts_normcase`
'_str_normcase_cached',
# The `_parts_normcase_cached` slot stores the case-normalized
# string path after splitting on path separators. It's set when the
# `_parts_normcase` property is accessed for the first time. It's used
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',
# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',
)
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
"""Construct a PurePath from one or several strings and or existing """Construct a PurePath from one or several strings and or existing
PurePath objects. The strings and path objects are combined so as PurePath objects. The strings and path objects are combined so as
@ -600,8 +616,7 @@ def __init__(self, *args):
"object where __fspath__ returns a str, " "object where __fspath__ returns a str, "
f"not {type(path).__name__!r}") f"not {type(path).__name__!r}")
paths.append(path) paths.append(path)
self._raw_paths = paths super().__init__(*paths)
self._resolving = False
def __reduce__(self): def __reduce__(self):
# Using the parts tuple helps share interned path parts # Using the parts tuple helps share interned path parts
@ -719,7 +734,7 @@ class PureWindowsPath(PurePath):
# Filesystem-accessing classes # Filesystem-accessing classes
class _PathBase(PurePath): class _PathBase(_PurePathBase):
"""Base class for concrete path objects. """Base class for concrete path objects.
This class provides dummy implementations for many methods that derived This class provides dummy implementations for many methods that derived
@ -733,8 +748,6 @@ class _PathBase(PurePath):
such as paths in archive files or on remote storage systems. such as paths in archive files or on remote storage systems.
""" """
__slots__ = () __slots__ = ()
__bytes__ = None
__fspath__ = None # virtual paths have no local file system representation
@classmethod @classmethod
def _unsupported(cls, method_name): def _unsupported(cls, method_name):
@ -1341,7 +1354,7 @@ def as_uri(self):
self._unsupported("as_uri") self._unsupported("as_uri")
class Path(_PathBase): class Path(_PathBase, PurePath):
"""PurePath subclass that can make system calls. """PurePath subclass that can make system calls.
Path represents a filesystem path but unlike PurePath, also offers Path represents a filesystem path but unlike PurePath, also offers
@ -1351,8 +1364,6 @@ class Path(_PathBase):
but cannot instantiate a WindowsPath on a POSIX system or vice versa. but cannot instantiate a WindowsPath on a POSIX system or vice versa.
""" """
__slots__ = () __slots__ = ()
__bytes__ = PurePath.__bytes__
__fspath__ = PurePath.__fspath__
as_uri = PurePath.as_uri as_uri = PurePath.as_uri
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View file

@ -49,8 +49,35 @@ def test_is_notimplemented(self):
# Tests for the pure classes. # Tests for the pure classes.
# #
class PurePathTest(unittest.TestCase):
cls = pathlib.PurePath class PurePathBaseTest(unittest.TestCase):
cls = pathlib._PurePathBase
def test_magic_methods(self):
P = self.cls
self.assertFalse(hasattr(P, '__fspath__'))
self.assertFalse(hasattr(P, '__bytes__'))
self.assertIs(P.__reduce__, object.__reduce__)
self.assertIs(P.__hash__, object.__hash__)
self.assertIs(P.__eq__, object.__eq__)
self.assertIs(P.__lt__, object.__lt__)
self.assertIs(P.__le__, object.__le__)
self.assertIs(P.__gt__, object.__gt__)
self.assertIs(P.__ge__, object.__ge__)
class DummyPurePath(pathlib._PurePathBase):
def __eq__(self, other):
if not isinstance(other, DummyPurePath):
return NotImplemented
return str(self) == str(other)
def __hash__(self):
return hash(str(self))
class DummyPurePathTest(unittest.TestCase):
cls = DummyPurePath
# Keys are canonical paths, values are list of tuples of arguments # Keys are canonical paths, values are list of tuples of arguments
# supposed to produce equal paths. # supposed to produce equal paths.
@ -82,12 +109,6 @@ def test_constructor_common(self):
P('/a', 'b', 'c') P('/a', 'b', 'c')
P('a/b/c') P('a/b/c')
P('/a/b/c') P('/a/b/c')
P(FakePath("a/b/c"))
self.assertEqual(P(P('a')), P('a'))
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))
def test_concrete_class(self): def test_concrete_class(self):
if self.cls is pathlib.PurePath: if self.cls is pathlib.PurePath:
@ -193,8 +214,6 @@ def test_join_common(self):
self.assertIs(type(pp), type(p)) self.assertIs(type(pp), type(p))
pp = p.joinpath('c', 'd') pp = p.joinpath('c', 'd')
self.assertEqual(pp, P('a/b/c/d')) self.assertEqual(pp, P('a/b/c/d'))
pp = p.joinpath(P('c'))
self.assertEqual(pp, P('a/b/c'))
pp = p.joinpath('/c') pp = p.joinpath('/c')
self.assertEqual(pp, P('/c')) self.assertEqual(pp, P('/c'))
@ -211,8 +230,6 @@ def test_div_common(self):
self.assertEqual(pp, P('a/b/c/d')) self.assertEqual(pp, P('a/b/c/d'))
pp = 'c' / p / 'd' pp = 'c' / p / 'd'
self.assertEqual(pp, P('c/a/b/d')) self.assertEqual(pp, P('c/a/b/d'))
pp = p / P('c')
self.assertEqual(pp, P('a/b/c'))
pp = p/ '/c' pp = p/ '/c'
self.assertEqual(pp, P('/c')) self.assertEqual(pp, P('/c'))
@ -678,6 +695,29 @@ def test_is_relative_to_common(self):
self.assertFalse(p.is_relative_to('')) self.assertFalse(p.is_relative_to(''))
self.assertFalse(p.is_relative_to(P('a'))) self.assertFalse(p.is_relative_to(P('a')))
class PurePathTest(DummyPurePathTest):
cls = pathlib.PurePath
def test_constructor_nested(self):
P = self.cls
P(FakePath("a/b/c"))
self.assertEqual(P(P('a')), P('a'))
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))
def test_join_nested(self):
P = self.cls
p = P('a/b').joinpath(P('c'))
self.assertEqual(p, P('a/b/c'))
def test_div_nested(self):
P = self.cls
p = P('a/b') / P('c')
self.assertEqual(p, P('a/b/c'))
def test_pickling_common(self): def test_pickling_common(self):
P = self.cls P = self.cls
p = P('/a/b') p = P('/a/b')
@ -1545,7 +1585,7 @@ class cls(pathlib.PurePath):
# Tests for the virtual classes. # Tests for the virtual classes.
# #
class PathBaseTest(PurePathTest): class PathBaseTest(PurePathBaseTest):
cls = pathlib._PathBase cls = pathlib._PathBase
def test_unsupported_operation(self): def test_unsupported_operation(self):
@ -1636,6 +1676,14 @@ class DummyPath(pathlib._PathBase):
_directories = {} _directories = {}
_symlinks = {} _symlinks = {}
def __eq__(self, other):
if not isinstance(other, DummyPath):
return NotImplemented
return str(self) == str(other)
def __hash__(self):
return hash(str(self))
def stat(self, *, follow_symlinks=True): def stat(self, *, follow_symlinks=True):
if follow_symlinks: if follow_symlinks:
path = str(self.resolve()) path = str(self.resolve())
@ -1707,7 +1755,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
self.mkdir(mode, parents=False, exist_ok=exist_ok) self.mkdir(mode, parents=False, exist_ok=exist_ok)
class DummyPathTest(unittest.TestCase): class DummyPathTest(DummyPurePathTest):
"""Tests for PathBase methods that use stat(), open() and iterdir().""" """Tests for PathBase methods that use stat(), open() and iterdir()."""
cls = DummyPath cls = DummyPath
@ -2014,7 +2062,7 @@ def _check(path, glob, expected):
def test_rglob_common(self): def test_rglob_common(self):
def _check(glob, expected): def _check(glob, expected):
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected)) self.assertEqual(set(glob), {P(BASE, q) for q in expected})
P = self.cls P = self.cls
p = P(BASE) p = P(BASE)
it = p.rglob("fileA") it = p.rglob("fileA")
@ -2198,7 +2246,7 @@ def test_glob_above_recursion_limit(self):
# directory_depth > recursion_limit # directory_depth > recursion_limit
directory_depth = recursion_limit + 10 directory_depth = recursion_limit + 10
base = self.cls(BASE, 'deep') base = self.cls(BASE, 'deep')
path = self.cls(base, *(['d'] * directory_depth)) path = base.joinpath(*(['d'] * directory_depth))
path.mkdir(parents=True) path.mkdir(parents=True)
with set_recursion_limit(recursion_limit): with set_recursion_limit(recursion_limit):
@ -2741,7 +2789,7 @@ def test_walk_above_recursion_limit(self):
# directory_depth > recursion_limit # directory_depth > recursion_limit
directory_depth = recursion_limit + 10 directory_depth = recursion_limit + 10
base = self.cls(BASE, 'deep') base = self.cls(BASE, 'deep')
path = self.cls(base, *(['d'] * directory_depth)) path = base.joinpath(*(['d'] * directory_depth))
path.mkdir(parents=True) path.mkdir(parents=True)
with set_recursion_limit(recursion_limit): with set_recursion_limit(recursion_limit):

View file

@ -0,0 +1,3 @@
Add private ``pathlib._PurePathBase`` class: a base class for
:class:`pathlib.PurePath` that omits certain magic methods. It may be made
public (along with ``_PathBase``) in future.