bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. (#29040)

The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c.

https://bugs.python.org/issue45506
This commit is contained in:
Eric Snow 2021-10-22 17:20:03 -06:00 committed by GitHub
parent f30ad65dbf
commit 17c61045c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 236 additions and 24 deletions

View file

@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
wchar_t *buf, const size_t buf_len);
// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.

View file

@ -0,0 +1,30 @@
# Run tests for functions in Python/fileutils.c.
import os
import os.path
import unittest
from test.support import import_helper
# Skip this test if the _testcapi module isn't available.
_testcapi = import_helper.import_module('_testinternalcapi')
class PathTests(unittest.TestCase):
def test_capi_normalize_path(self):
if os.name == 'nt':
raise unittest.SkipTest('Windows has its own helper for this')
else:
from .test_posixpath import PosixPathTest as posixdata
tests = posixdata.NORMPATH_CASES
for filename, expected in tests:
if not os.path.isabs(filename):
continue
with self.subTest(filename):
result = _testcapi.normalize_path(filename)
self.assertEqual(result, expected,
msg=f'input: {filename!r} expected output: {expected!r}')
if __name__ == "__main__":
unittest.main()

View file

@ -304,25 +304,51 @@ def test_expanduser_pwd(self):
for path in ('~', '~/.local', '~vstinner/'):
self.assertEqual(posixpath.expanduser(path), path)
def test_normpath(self):
self.assertEqual(posixpath.normpath(""), ".")
self.assertEqual(posixpath.normpath("/"), "/")
self.assertEqual(posixpath.normpath("//"), "//")
self.assertEqual(posixpath.normpath("///"), "/")
self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
"/foo/baz")
self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
NORMPATH_CASES = [
("", "."),
("/", "/"),
("/.", "/"),
("/./", "/"),
("/.//.", "/"),
("/foo", "/foo"),
("/foo/bar", "/foo/bar"),
("//", "//"),
("///", "/"),
("///foo/.//bar//", "/foo/bar"),
("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
("///..//./foo/.//bar", "/foo/bar"),
(".", "."),
(".//.", "."),
("..", ".."),
("../", ".."),
("../foo", "../foo"),
("../../foo", "../../foo"),
("../foo/../bar", "../bar"),
("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
("/..", "/"),
("/..", "/"),
("/../", "/"),
("/..//", "/"),
("//..", "//"),
("/../foo", "/foo"),
("/../../foo", "/foo"),
("/../foo/../", "/"),
("/../foo/../bar", "/bar"),
("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
]
self.assertEqual(posixpath.normpath(b""), b".")
self.assertEqual(posixpath.normpath(b"/"), b"/")
self.assertEqual(posixpath.normpath(b"//"), b"//")
self.assertEqual(posixpath.normpath(b"///"), b"/")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
b"/foo/baz")
self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
b"/foo/bar")
def test_normpath(self):
for path, expected in self.NORMPATH_CASES:
with self.subTest(path):
result = posixpath.normpath(path)
self.assertEqual(result, expected)
path = path.encode('utf-8')
expected = expected.encode('utf-8')
with self.subTest(path, type=bytes):
result = posixpath.normpath(path)
self.assertEqual(result, expected)
@skip_if_ABSTFN_contains_backslash
def test_realpath_curdir(self):

View file

@ -14,12 +14,14 @@
#include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_fileutils.h" // _Py_normalize_path
#include "pycore_gc.h" // PyGC_Head
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "osdefs.h" // MAXPATHLEN
static PyObject *
@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
}
static PyObject *
normalize_path(PyObject *self, PyObject *filename)
{
Py_ssize_t size = -1;
wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
if (encoded == NULL) {
return NULL;
}
wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
PyMem_Free(encoded);
if (res != 0) {
PyErr_SetString(PyExc_ValueError, "string too long");
return NULL;
}
return PyUnicode_FromWideChar(buf, -1);
}
static PyMethodDef TestMethods[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = {
{"set_config", test_set_config, METH_O},
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
{"test_edit_cost", test_edit_cost, METH_NOARGS},
{"normalize_path", normalize_path, METH_O, NULL},
{NULL, NULL} /* sentinel */
};

View file

@ -519,6 +519,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig,
}
static PyStatus
calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
// Note that, unlike calculate_set_prefix(), here we allow a negative
// prefix_found. That means the source tree Lib dir gets used.
if (!calculate->prefix_found) {
return _PyStatus_OK();
}
PyStatus status;
wchar_t *prefix = calculate->prefix;
if (!_Py_isabs(prefix)) {
prefix = _PyMem_RawWcsdup(prefix);
if (prefix == NULL) {
return _PyStatus_NO_MEMORY();
}
status = absolutize(&prefix);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
wchar_t buf[MAXPATHLEN + 1];
int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
if (prefix != calculate->prefix) {
PyMem_RawFree(prefix);
}
if (res < 0) {
return PATHLEN_ERR();
}
pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
if (pathconfig->stdlib_dir == NULL) {
return _PyStatus_NO_MEMORY();
}
return _PyStatus_OK();
}
static PyStatus
calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
}
if (pathconfig->stdlib_dir == NULL) {
if (calculate->prefix_found) {
/* This must be done *before* calculate_set_prefix() is called. */
pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix);
if (pathconfig->stdlib_dir == NULL) {
return _PyStatus_NO_MEMORY();
}
/* This must be done *before* calculate_set_prefix() is called. */
status = calculate_set_stdlib_dir(calculate, pathconfig);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}

View file

@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename)
}
/* Remove navigation elements such as "." and "..".
This is mostly a C implementation of posixpath.normpath().
Return 0 on success. Return -1 if "orig" is too big for the buffer. */
int
_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
{
assert(path && *path != L'\0');
assert(*path == SEP); // an absolute path
if (wcslen(path) + 1 >= buf_len) {
return -1;
}
int dots = -1;
int check_leading = 1;
const wchar_t *buf_start = buf;
wchar_t *buf_next = buf;
// The resulting filename will never be longer than path.
for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
wchar_t c = *remainder;
buf_next[0] = c;
buf_next++;
if (c == SEP) {
assert(dots <= 2);
if (dots == 2) {
// Turn "/x/y/../z" into "/x/z".
buf_next -= 4; // "/../"
assert(*buf_next == SEP);
// We cap it off at the root, so "/../spam" becomes "/spam".
if (buf_next == buf_start) {
buf_next++;
}
else {
// Move to the previous SEP in the buffer.
while (*(buf_next - 1) != SEP) {
assert(buf_next != buf_start);
buf_next--;
}
}
}
else if (dots == 1) {
// Turn "/./" into "/".
buf_next -= 2; // "./"
assert(*(buf_next - 1) == SEP);
}
else if (dots == 0) {
// Turn "//" into "/".
buf_next--;
assert(*(buf_next - 1) == SEP);
if (check_leading) {
if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
// Leave a leading "//" alone, unless "///...".
buf_next++;
buf_start++;
}
check_leading = 0;
}
}
dots = 0;
}
else {
check_leading = 0;
if (dots >= 0) {
if (c == L'.' && dots < 2) {
dots++;
}
else {
dots = -1;
}
}
}
}
if (dots >= 0) {
// Strip any trailing dots and trailing slash.
buf_next -= dots + 1; // "/" or "/." or "/.."
assert(*buf_next == SEP);
if (buf_next == buf_start) {
// Leave the leading slash for root.
buf_next++;
}
else {
if (dots == 2) {
// Move to the previous SEP in the buffer.
do {
assert(buf_next != buf_start);
buf_next--;
} while (*(buf_next) != SEP);
}
}
}
*buf_next = L'\0';
return 0;
}
/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.