GH-104996: Defer joining of pathlib.PurePath() arguments. (GH-104999)

Joining of arguments is moved to `_load_parts`, which is called when a
normalized path is needed.
This commit is contained in:
Barney Gale 2023-06-07 23:27:06 +01:00 committed by GitHub
parent f5df347fcf
commit ffeaec7e60
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 17 deletions

View file

@ -195,10 +195,10 @@ def _select_unique(paths):
yielded = set()
try:
for path in paths:
raw_path = path._raw_path
if raw_path not in yielded:
path_str = str(path)
if path_str not in yielded:
yield path
yielded.add(raw_path)
yielded.add(path_str)
finally:
yielded.clear()
@ -247,9 +247,9 @@ class PurePath:
"""
__slots__ = (
# The `_raw_path` slot stores an unnormalized string path. This is set
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_path',
'_raw_paths',
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
@ -306,10 +306,11 @@ def __init__(self, *args):
paths = []
for arg in args:
if isinstance(arg, PurePath):
path = arg._raw_path
if arg._flavour is ntpath and self._flavour is posixpath:
# GH-103631: Convert separators for backwards compatibility.
path = path.replace('\\', '/')
paths.extend(path.replace('\\', '/') for path in arg._raw_paths)
else:
paths.extend(arg._raw_paths)
else:
try:
path = os.fspath(arg)
@ -320,13 +321,8 @@ def __init__(self, *args):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
paths.append(path)
if len(paths) == 0:
self._raw_path = ''
elif len(paths) == 1:
self._raw_path = paths[0]
else:
self._raw_path = self._flavour.join(*paths)
paths.append(path)
self._raw_paths = paths
def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects.
@ -356,7 +352,14 @@ def _parse_path(cls, path):
return drv, root, parsed
def _load_parts(self):
drv, root, tail = self._parse_path(self._raw_path)
paths = self._raw_paths
if len(paths) == 0:
path = ''
elif len(paths) == 1:
path = paths[0]
else:
path = self._flavour.join(*paths)
drv, root, tail = self._parse_path(path)
self._drv = drv
self._root = root
self._tail_cached = tail
@ -687,10 +690,17 @@ def parents(self):
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
# ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
# ntpath.isabs() is defective - see GH-44626.
return bool(self.drive and self.root)
return self._flavour.isabs(self._raw_path)
elif self._flavour is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
else:
return self._flavour.isabs(str(self))
def is_reserved(self):
"""Return True if the path contains one of the special names reserved

View file

@ -0,0 +1,2 @@
Improve performance of :class:`pathlib.PurePath` initialisation by
deferring joining of paths when multiple arguments are given.