From 2dbb2e035b968811ddc00317ccf0cadafacffe1c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 17 Nov 2023 15:32:50 +0000 Subject: [PATCH] GH-110109: Churn `pathlib.PurePath` methods (#112012) Re-arrange `pathlib.PurePath` methods in source code. No other changes. The `PurePath` implementations of certain special methods, such as `__eq__()` and `__hash__()`, are not usually applicable to user subclasses of `_PathBase`. To facilitate their removal, another patch will split the `PurePath` class into `_PurePathBase` and `PurePath`, with the latter providing these special methods. This patch prepares the ground for splitting `PurePath`. It's similar to e8d77b0, which preceded splitting `Path`. By churning the methods here, subsequent patches will be easier to review and less likely to break things. --- Lib/pathlib.py | 240 +++++++++++++++++++-------------------- Lib/test/test_pathlib.py | 168 +++++++++++++-------------- 2 files changed, 204 insertions(+), 204 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c06ea5c9bf1..656c25bbc1f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -246,44 +246,6 @@ class PurePath: ) pathmod = os.path - def __new__(cls, *args, **kwargs): - """Construct a PurePath from one or several strings and or existing - PurePath objects. The strings and path objects are combined so as - to yield a canonicalized path, which is incorporated into the - new PurePath object. - """ - if cls is PurePath: - cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return object.__new__(cls) - - def __reduce__(self): - # Using the parts tuple helps share interned path parts - # when pickling related paths. - return (self.__class__, self.parts) - - def __init__(self, *args): - paths = [] - for arg in args: - if isinstance(arg, PurePath): - if arg.pathmod is ntpath and self.pathmod is posixpath: - # GH-103631: Convert separators for backwards compatibility. - paths.extend(path.replace('\\', '/') for path in arg._raw_paths) - else: - paths.extend(arg._raw_paths) - else: - try: - path = os.fspath(arg) - except TypeError: - path = arg - if not isinstance(path, str): - raise TypeError( - "argument should be a str or an os.PathLike " - "object where __fspath__ returns a str, " - f"not {type(path).__name__!r}") - paths.append(path) - self._raw_paths = paths - self._resolving = False - def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. Subclasses may override this method to customize how new path objects @@ -351,96 +313,14 @@ def __str__(self): self._tail) or '.' return self._str - def __fspath__(self): - return str(self) - def as_posix(self): """Return the string representation of the path with forward (/) slashes.""" return str(self).replace(self.pathmod.sep, '/') - def __bytes__(self): - """Return the bytes representation of the path. This is only - recommended to use under Unix.""" - return os.fsencode(self) - def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) - def as_uri(self): - """Return the path as a URI.""" - if not self.is_absolute(): - raise ValueError("relative path can't be expressed as a file URI") - - drive = self.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - prefix = 'file:///' + drive - path = self.as_posix()[2:] - elif drive: - # It's a path on a network drive => 'file://host/share/a/b' - prefix = 'file:' - path = self.as_posix() - else: - # It's a posix path => 'file:///etc/hosts' - prefix = 'file://' - path = str(self) - from urllib.parse import quote_from_bytes - return prefix + quote_from_bytes(os.fsencode(path)) - - @property - def _str_normcase(self): - # String with normalized case, for hashing and equality checks - try: - return self._str_normcase_cached - except AttributeError: - if _is_case_sensitive(self.pathmod): - self._str_normcase_cached = str(self) - else: - self._str_normcase_cached = str(self).lower() - return self._str_normcase_cached - - @property - def _parts_normcase(self): - # Cached parts with normalized case, for comparisons. - try: - return self._parts_normcase_cached - except AttributeError: - self._parts_normcase_cached = self._str_normcase.split(self.pathmod.sep) - return self._parts_normcase_cached - - def __eq__(self, other): - if not isinstance(other, PurePath): - return NotImplemented - return self._str_normcase == other._str_normcase and self.pathmod is other.pathmod - - def __hash__(self): - try: - return self._hash - except AttributeError: - self._hash = hash(self._str_normcase) - return self._hash - - def __lt__(self, other): - if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: - return NotImplemented - return self._parts_normcase < other._parts_normcase - - def __le__(self, other): - if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: - return NotImplemented - return self._parts_normcase <= other._parts_normcase - - def __gt__(self, other): - if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: - return NotImplemented - return self._parts_normcase > other._parts_normcase - - def __ge__(self, other): - if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: - return NotImplemented - return self._parts_normcase >= other._parts_normcase - @property def drive(self): """The drive prefix (letter or UNC path), if any.""" @@ -694,6 +574,126 @@ def match(self, path_pattern, *, case_sensitive=None): match = _compile_pattern(pattern_str, sep, case_sensitive) return match(str(self)) is not None + def __new__(cls, *args, **kwargs): + """Construct a PurePath from one or several strings and or existing + PurePath objects. The strings and path objects are combined so as + to yield a canonicalized path, which is incorporated into the + new PurePath object. + """ + if cls is PurePath: + cls = PureWindowsPath if os.name == 'nt' else PurePosixPath + return object.__new__(cls) + + def __init__(self, *args): + paths = [] + for arg in args: + if isinstance(arg, PurePath): + if arg.pathmod is ntpath and self.pathmod is posixpath: + # GH-103631: Convert separators for backwards compatibility. + paths.extend(path.replace('\\', '/') for path in arg._raw_paths) + else: + paths.extend(arg._raw_paths) + else: + try: + path = os.fspath(arg) + except TypeError: + path = arg + if not isinstance(path, str): + raise TypeError( + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") + paths.append(path) + self._raw_paths = paths + self._resolving = False + + def __reduce__(self): + # Using the parts tuple helps share interned path parts + # when pickling related paths. + return (self.__class__, self.parts) + + def __fspath__(self): + return str(self) + + def __bytes__(self): + """Return the bytes representation of the path. This is only + recommended to use under Unix.""" + return os.fsencode(self) + + @property + def _str_normcase(self): + # String with normalized case, for hashing and equality checks + try: + return self._str_normcase_cached + except AttributeError: + if _is_case_sensitive(self.pathmod): + self._str_normcase_cached = str(self) + else: + self._str_normcase_cached = str(self).lower() + return self._str_normcase_cached + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._str_normcase) + return self._hash + + def __eq__(self, other): + if not isinstance(other, PurePath): + return NotImplemented + return self._str_normcase == other._str_normcase and self.pathmod is other.pathmod + + @property + def _parts_normcase(self): + # Cached parts with normalized case, for comparisons. + try: + return self._parts_normcase_cached + except AttributeError: + self._parts_normcase_cached = self._str_normcase.split(self.pathmod.sep) + return self._parts_normcase_cached + + def __lt__(self, other): + if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: + return NotImplemented + return self._parts_normcase < other._parts_normcase + + def __le__(self, other): + if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: + return NotImplemented + return self._parts_normcase <= other._parts_normcase + + def __gt__(self, other): + if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: + return NotImplemented + return self._parts_normcase > other._parts_normcase + + def __ge__(self, other): + if not isinstance(other, PurePath) or self.pathmod is not other.pathmod: + return NotImplemented + return self._parts_normcase >= other._parts_normcase + + def as_uri(self): + """Return the path as a URI.""" + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + prefix = 'file:///' + drive + path = self.as_posix()[2:] + elif drive: + # It's a path on a network drive => 'file://host/share/a/b' + prefix = 'file:' + path = self.as_posix() + else: + # It's a posix path => 'file:///etc/hosts' + prefix = 'file://' + path = str(self) + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) + # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index eca88d0cc74..a53aae855df 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -117,33 +117,6 @@ def test_different_pathmods_unordered(self): with self.assertRaises(TypeError): p >= q - def test_bytes(self): - P = self.cls - message = (r"argument should be a str or an os\.PathLike object " - r"where __fspath__ returns a str, not 'bytes'") - with self.assertRaisesRegex(TypeError, message): - P(b'a') - with self.assertRaisesRegex(TypeError, message): - P(b'a', 'b') - with self.assertRaisesRegex(TypeError, message): - P('a', b'b') - with self.assertRaises(TypeError): - P('a').joinpath(b'b') - with self.assertRaises(TypeError): - P('a') / b'b' - with self.assertRaises(TypeError): - b'a' / P('b') - with self.assertRaises(TypeError): - P('a').match(b'b') - with self.assertRaises(TypeError): - P('a').relative_to(b'b') - with self.assertRaises(TypeError): - P('a').with_name(b'b') - with self.assertRaises(TypeError): - P('a').with_stem(b'b') - with self.assertRaises(TypeError): - P('a').with_suffix(b'b') - def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object # from a str subclass instance, and it then gets converted to @@ -273,18 +246,6 @@ def test_as_posix_common(self): self.assertEqual(P(pathstr).as_posix(), pathstr) # Other tests for as_posix() are in test_equivalences(). - def test_as_bytes_common(self): - sep = os.fsencode(self.sep) - P = self.cls - self.assertEqual(bytes(P('a/b')), b'a' + sep + b'b') - - def test_as_uri_common(self): - P = self.cls - with self.assertRaises(ValueError): - P('a').as_uri() - with self.assertRaises(ValueError): - P().as_uri() - def test_repr_common(self): for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): with self.subTest(pathstr=pathstr): @@ -297,17 +258,6 @@ def test_repr_common(self): inner = r[len(clsname) + 1 : -1] self.assertEqual(eval(inner), p.as_posix()) - def test_repr_roundtrips(self): - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): - with self.subTest(pathstr=pathstr): - p = self.cls(pathstr) - r = repr(p) - # The repr() roundtrips. - q = eval(r, pathlib.__dict__) - self.assertIs(q.__class__, p.__class__) - self.assertEqual(q, p) - self.assertEqual(repr(q), r) - def test_eq_common(self): P = self.cls self.assertEqual(P('a/b'), P('a/b')) @@ -390,34 +340,6 @@ def test_match_common(self): self.assertTrue(P().match('**')) self.assertFalse(P().match('**/*')) - def test_ordering_common(self): - # Ordering is tuple-alike. - def assertLess(a, b): - self.assertLess(a, b) - self.assertGreater(b, a) - P = self.cls - a = P('a') - b = P('a/b') - c = P('abc') - d = P('b') - assertLess(a, b) - assertLess(a, c) - assertLess(a, d) - assertLess(b, c) - assertLess(c, d) - P = self.cls - a = P('/a') - b = P('/a/b') - c = P('/abc') - d = P('/b') - assertLess(a, b) - assertLess(a, c) - assertLess(a, d) - assertLess(b, c) - assertLess(c, d) - with self.assertRaises(TypeError): - P() < {} - def test_parts_common(self): # `parts` returns a tuple. sep = self.sep @@ -430,12 +352,6 @@ def test_parts_common(self): parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) - def test_fspath_common(self): - P = self.cls - p = P('a/b') - self._check_str(p.__fspath__(), ('a/b',)) - self._check_str(os.fspath(p), ('a/b',)) - def test_equivalences(self): for k, tuples in self.equivalences.items(): canon = k.replace('/', self.sep) @@ -787,6 +703,90 @@ def test_pickling_common(self): self.assertEqual(hash(pp), hash(p)) self.assertEqual(str(pp), str(p)) + def test_fspath_common(self): + P = self.cls + p = P('a/b') + self._check_str(p.__fspath__(), ('a/b',)) + self._check_str(os.fspath(p), ('a/b',)) + + def test_bytes(self): + P = self.cls + message = (r"argument should be a str or an os\.PathLike object " + r"where __fspath__ returns a str, not 'bytes'") + with self.assertRaisesRegex(TypeError, message): + P(b'a') + with self.assertRaisesRegex(TypeError, message): + P(b'a', 'b') + with self.assertRaisesRegex(TypeError, message): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + + def test_as_bytes_common(self): + sep = os.fsencode(self.sep) + P = self.cls + self.assertEqual(bytes(P('a/b')), b'a' + sep + b'b') + + def test_ordering_common(self): + # Ordering is tuple-alike. + def assertLess(a, b): + self.assertLess(a, b) + self.assertGreater(b, a) + P = self.cls + a = P('a') + b = P('a/b') + c = P('abc') + d = P('b') + assertLess(a, b) + assertLess(a, c) + assertLess(a, d) + assertLess(b, c) + assertLess(c, d) + P = self.cls + a = P('/a') + b = P('/a/b') + c = P('/abc') + d = P('/b') + assertLess(a, b) + assertLess(a, c) + assertLess(a, d) + assertLess(b, c) + assertLess(c, d) + with self.assertRaises(TypeError): + P() < {} + + def test_as_uri_common(self): + P = self.cls + with self.assertRaises(ValueError): + P('a').as_uri() + with self.assertRaises(ValueError): + P().as_uri() + + def test_repr_roundtrips(self): + for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + with self.subTest(pathstr=pathstr): + p = self.cls(pathstr) + r = repr(p) + # The repr() roundtrips. + q = eval(r, pathlib.__dict__) + self.assertIs(q.__class__, p.__class__) + self.assertEqual(q, p) + self.assertEqual(repr(q), r) + class PurePosixPathTest(PurePathTest): cls = pathlib.PurePosixPath