diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 4cab3113c9e..d06d9ce8c9e 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -344,15 +344,24 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: realpath(path) +.. function:: realpath(path, *, strict=False) Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating system). + If a path doesn't exist or a symlink loop is encountered, and *strict* is + ``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is + resolved as far as possible and any remainder is appended without checking + whether it exists. + .. note:: - When symbolic link cycles occur, the returned path will be one member of - the cycle, but no guarantee is made about which member that will be. + This function emulates the operating system's procedure for making a path + canonical, which differs slightly between Windows and UNIX with respect + to how links and subsequent path components interact. + + Operating system APIs make paths canonical as needed, so it's not + normally necessary to call this function. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -360,6 +369,9 @@ the :mod:`glob` module.) .. versionchanged:: 3.8 Symbolic links and junctions are now resolved on Windows. + .. versionchanged:: 3.10 + The *strict* parameter was added. + .. function:: relpath(path, start=os.curdir) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 5ae8079074c..527c7ae1938 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -635,7 +635,7 @@ def _getfinalpathname_nonstrict(path): tail = join(name, tail) if tail else name return tail - def realpath(path): + def realpath(path, *, strict=False): path = normpath(path) if isinstance(path, bytes): prefix = b'\\\\?\\' @@ -660,6 +660,8 @@ def realpath(path): path = _getfinalpathname(path) initial_winerror = 0 except OSError as ex: + if strict: + raise initial_winerror = ex.winerror path = _getfinalpathname_nonstrict(path) # The path returned by _getfinalpathname will always start with \\?\ - diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 37934c6038e..073fce82ad5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -14,12 +14,6 @@ from urllib.parse import quote_from_bytes as urlquote_from_bytes -if os.name == 'nt': - from nt import _getfinalpathname -else: - _getfinalpathname = None - - __all__ = [ "PurePath", "PurePosixPath", "PureWindowsPath", "Path", "PosixPath", "WindowsPath", @@ -29,14 +23,17 @@ # Internals # +_WINERROR_NOT_READY = 21 # drive exists but is not accessible +_WINERROR_INVALID_NAME = 123 # fix for bpo-35306 +_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself + # EBADF - guard against macOS `stat` throwing EBADF _IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP) _IGNORED_WINERRORS = ( - 21, # ERROR_NOT_READY - drive exists but is not accessible - 123, # ERROR_INVALID_NAME - fix for bpo-35306 - 1921, # ERROR_CANT_RESOLVE_FILENAME - fix for broken symlink pointing to itself -) + _WINERROR_NOT_READY, + _WINERROR_INVALID_NAME, + _WINERROR_CANT_RESOLVE_FILENAME) def _ignore_error(exception): return (getattr(exception, 'errno', None) in _IGNORED_ERROS or @@ -186,30 +183,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - def resolve(self, path, strict=False): - s = str(path) - if not s: - return path._accessor.getcwd() - previous_s = None - if _getfinalpathname is not None: - if strict: - return self._ext_to_normal(_getfinalpathname(s)) - else: - tail_parts = [] # End of the path after the first one not found - while True: - try: - s = self._ext_to_normal(_getfinalpathname(s)) - except FileNotFoundError: - previous_s = s - s, tail = os.path.split(s) - tail_parts.append(tail) - if previous_s == s: - return path - else: - return os.path.join(s, *reversed(tail_parts)) - # Means fallback on absolute - return None - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): prefix = '' if s.startswith(ext_prefix): @@ -220,10 +193,6 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): s = '\\' + s[3:] return prefix, s - def _ext_to_normal(self, s): - # Turn back an extended path into a normal DOS-like path - return self._split_extended_path(s)[1] - def is_reserved(self, parts): # NOTE: the rules for reserved names seem somewhat complicated # (e.g. r"..\NUL" is reserved but not r"foo\NUL"). @@ -281,54 +250,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern)).fullmatch - def resolve(self, path, strict=False): - sep = self.sep - accessor = path._accessor - seen = {} - def _resolve(path, rest): - if rest.startswith(sep): - path = '' - - for name in rest.split(sep): - if not name or name == '.': - # current dir - continue - if name == '..': - # parent dir - path, _, _ = path.rpartition(sep) - continue - if path.endswith(sep): - newpath = path + name - else: - newpath = path + sep + name - if newpath in seen: - # Already seen this path - path = seen[newpath] - if path is not None: - # use cached value - continue - # The symlink is not resolved, so we must have a symlink loop. - raise RuntimeError("Symlink loop from %r" % newpath) - # Resolve the symbolic link - try: - target = accessor.readlink(newpath) - except OSError as e: - if e.errno != EINVAL and strict: - raise - # Not a symlink, or non-strict mode. We just leave the path - # untouched. - path = newpath - else: - seen[newpath] = None # not resolved symlink - path = _resolve(path, target) - seen[newpath] = path # resolved symlink - - return path - # NOTE: according to POSIX, getcwd() cannot contain path components - # which are symlinks. - base = '' if path.is_absolute() else accessor.getcwd() - return _resolve(base, str(path)) or sep - def is_reserved(self, parts): return False @@ -424,6 +345,8 @@ def group(self, path): expanduser = staticmethod(os.path.expanduser) + realpath = staticmethod(os.path.realpath) + _normal_accessor = _NormalAccessor() @@ -1132,15 +1055,27 @@ def resolve(self, strict=False): normalizing it (for example turning slashes into backslashes under Windows). """ - s = self._flavour.resolve(self, strict=strict) - if s is None: - # No symlink resolution => for consistency, raise an error if - # the path doesn't exist or is forbidden - self.stat() - s = str(self.absolute()) - # Now we have no symlinks in the path, it's safe to normalize it. - normed = self._flavour.pathmod.normpath(s) - return self._from_parts((normed,)) + + def check_eloop(e): + winerror = getattr(e, 'winerror', 0) + if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME: + raise RuntimeError("Symlink loop from %r" % e.filename) + + try: + s = self._accessor.realpath(self, strict=strict) + except OSError as e: + check_eloop(e) + raise + p = self._from_parts((s,)) + + # In non-strict mode, realpath() doesn't raise on symlink loops. + # Ensure we get an exception by calling stat() + if not strict: + try: + p.stat() + except OSError as e: + check_eloop(e) + return p def stat(self, *, follow_symlinks=True): """ diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 62afbd0ccf0..259baa64b19 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -387,16 +387,16 @@ def abspath(path): # Return a canonical path (i.e. the absolute location of a file on the # filesystem). -def realpath(filename): +def realpath(filename, *, strict=False): """Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path.""" filename = os.fspath(filename) - path, ok = _joinrealpath(filename[:0], filename, {}) + path, ok = _joinrealpath(filename[:0], filename, strict, {}) return abspath(path) # Join two paths, normalizing and eliminating any symbolic links # encountered in the second path. -def _joinrealpath(path, rest, seen): +def _joinrealpath(path, rest, strict, seen): if isinstance(path, bytes): sep = b'/' curdir = b'.' @@ -425,7 +425,15 @@ def _joinrealpath(path, rest, seen): path = pardir continue newpath = join(path, name) - if not islink(newpath): + try: + st = os.lstat(newpath) + except OSError: + if strict: + raise + is_link = False + else: + is_link = stat.S_ISLNK(st.st_mode) + if not is_link: path = newpath continue # Resolve the symbolic link @@ -436,10 +444,14 @@ def _joinrealpath(path, rest, seen): # use cached value continue # The symlink is not resolved, so we must have a symlink loop. - # Return already resolved part + rest of the path unchanged. - return join(newpath, rest), False + if strict: + # Raise OSError(errno.ELOOP) + os.stat(newpath) + else: + # Return already resolved part + rest of the path unchanged. + return join(newpath, rest), False seen[newpath] = None # not resolved symlink - path, ok = _joinrealpath(path, os.readlink(newpath), seen) + path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen) if not ok: return join(path, rest), False seen[newpath] = path # resolved symlink diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index f97aca5f94f..661c59d6171 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -269,6 +269,17 @@ def test_realpath_basic(self): self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), os.fsencode(ABSTFN)) + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_strict(self): + # Bug #43757: raise FileNotFoundError in strict mode if we encounter + # a path that does not exist. + ABSTFN = ntpath.abspath(os_helper.TESTFN) + os.symlink(ABSTFN + "1", ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN) + self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True) + self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True) + @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_relative(self): @@ -340,8 +351,9 @@ def test_realpath_broken_symlinks(self): @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_symlink_loops(self): - # Symlink loops are non-deterministic as to which path is returned, but - # it will always be the fully resolved path of one member of the cycle + # Symlink loops in non-strict mode are non-deterministic as to which + # path is returned, but it will always be the fully resolved path of + # one member of the cycle ABSTFN = ntpath.abspath(os_helper.TESTFN) self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN + "1") @@ -383,6 +395,50 @@ def test_realpath_symlink_loops(self): # Test using relative path as well. self.assertPathEqual(ntpath.realpath(ntpath.basename(ABSTFN)), ABSTFN) + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_loops_strict(self): + # Symlink loops raise OSError in strict mode + ABSTFN = ntpath.abspath(os_helper.TESTFN) + self.addCleanup(os_helper.unlink, ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "1") + self.addCleanup(os_helper.unlink, ABSTFN + "2") + self.addCleanup(os_helper.unlink, ABSTFN + "y") + self.addCleanup(os_helper.unlink, ABSTFN + "c") + self.addCleanup(os_helper.unlink, ABSTFN + "a") + + os.symlink(ABSTFN, ABSTFN) + self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=True) + + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", strict=True) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", strict=True) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", strict=True) + # Windows eliminates '..' components before resolving links, so the + # following call is not expected to raise. + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..", strict=True), + ntpath.dirname(ABSTFN)) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\x", strict=True) + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "y", + strict=True) + self.assertRaises(OSError, ntpath.realpath, + ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1", + strict=True) + + os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", strict=True) + + os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) + + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", strict=True) + + # Test using relative path as well. + self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN), + strict=True) + @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_symlink_prefix(self): diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index e18d01f4635..8d398ec0103 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -355,6 +355,19 @@ def test_realpath_basic(self): finally: os_helper.unlink(ABSTFN) + @unittest.skipUnless(hasattr(os, "symlink"), + "Missing symlink implementation") + @skip_if_ABSTFN_contains_backslash + def test_realpath_strict(self): + # Bug #43757: raise FileNotFoundError in strict mode if we encounter + # a path that does not exist. + try: + os.symlink(ABSTFN+"1", ABSTFN) + self.assertRaises(FileNotFoundError, realpath, ABSTFN, strict=True) + self.assertRaises(FileNotFoundError, realpath, ABSTFN + "2", strict=True) + finally: + os_helper.unlink(ABSTFN) + @unittest.skipUnless(hasattr(os, "symlink"), "Missing symlink implementation") @skip_if_ABSTFN_contains_backslash @@ -370,7 +383,7 @@ def test_realpath_relative(self): @skip_if_ABSTFN_contains_backslash def test_realpath_symlink_loops(self): # Bug #930024, return the path unchanged if we get into an infinite - # symlink loop. + # symlink loop in non-strict mode (default). try: os.symlink(ABSTFN, ABSTFN) self.assertEqual(realpath(ABSTFN), ABSTFN) @@ -407,6 +420,48 @@ def test_realpath_symlink_loops(self): os_helper.unlink(ABSTFN+"c") os_helper.unlink(ABSTFN+"a") + @unittest.skipUnless(hasattr(os, "symlink"), + "Missing symlink implementation") + @skip_if_ABSTFN_contains_backslash + def test_realpath_symlink_loops_strict(self): + # Bug #43757, raise OSError if we get into an infinite symlink loop in + # strict mode. + try: + os.symlink(ABSTFN, ABSTFN) + self.assertRaises(OSError, realpath, ABSTFN, strict=True) + + os.symlink(ABSTFN+"1", ABSTFN+"2") + os.symlink(ABSTFN+"2", ABSTFN+"1") + self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True) + + self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True) + os.symlink(ABSTFN+"x", ABSTFN+"y") + self.assertRaises(OSError, realpath, + ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True) + self.assertRaises(OSError, realpath, + ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True) + + os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a") + self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True) + + os.symlink("../" + basename(dirname(ABSTFN)) + "/" + + basename(ABSTFN) + "c", ABSTFN+"c") + self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True) + + # Test using relative path as well. + with os_helper.change_cwd(dirname(ABSTFN)): + self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True) + finally: + os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN+"1") + os_helper.unlink(ABSTFN+"2") + os_helper.unlink(ABSTFN+"y") + os_helper.unlink(ABSTFN+"c") + os_helper.unlink(ABSTFN+"a") + @unittest.skipUnless(hasattr(os, "symlink"), "Missing symlink implementation") @skip_if_ABSTFN_contains_backslash diff --git a/Misc/NEWS.d/next/Library/2021-04-08-22-11-27.bpo-25264.b33fa0.rst b/Misc/NEWS.d/next/Library/2021-04-08-22-11-27.bpo-25264.b33fa0.rst new file mode 100644 index 00000000000..593846ec15c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-08-22-11-27.bpo-25264.b33fa0.rst @@ -0,0 +1,3 @@ +:func:`os.path.realpath` now accepts a *strict* keyword-only argument. +When set to ``True``, :exc:`OSError` is raised if a path doesn't exist +or a symlink loop is encountered.