Do "path normalization" when creating a URI.

Path normaliztion removes '.' and '..' segments from a URI. Such relative references are only intended for URI References, and Uri References are only intended for resolving against a full URI.

We do path normalization on all URIs that have a scheme, authority or an absolute path, and partial normalization on what are really just relative paths. The partial normalization can leave ".." at the start of the path.

The URI reference resolution algorithm doesn't work as expected for a URI ending in "..". Resolving "./foo" wrt. a base of "/a/.." results in  "/a/foo" - this is avoided when the base is path normalized before it's used.

This also fixes the "normalizePath" function which currently removes leading '..' segments, contrary to its documentation. It also makes the function redundant since all URI paths are normalized automatically.

See discussion on http://dartbug.com/23688

Also fix bug in the removeDotSegments function.

R=floitsch@google.com

Review URL: https://codereview.chromium.org//1224263009.
This commit is contained in:
Lasse R.H. Nielsen 2015-07-17 17:08:36 +02:00
parent 206ffe343b
commit 848f6c978c
5 changed files with 258 additions and 100 deletions

View file

@ -9,6 +9,12 @@
* `dart:convert`
* `LineSplitter` added a `split` static method returning an `Iterable`.
* `dart:core`
* `Uri` class now perform path normalization when a URI is created.
This removes most `..` and `.` sequences from the URI path.
Purely relative paths (no scheme or authority) are allowed to retain
some leading "dot" segments.
* `dart:html`
* `NodeTreeSanitizer` added the `const trusted` field. It can be used
instead of defining a `NullTreeSanitizer` class when calling

View file

@ -381,9 +381,8 @@ class Uri {
}
assert(state == NOT_IN_PATH);
bool isFile = (scheme == "file");
bool ensureLeadingSlash = host != null;
path = _makePath(uri, pathStart, index, null, ensureLeadingSlash, isFile);
bool hasAuthority = (host != null);
path = _makePath(uri, pathStart, index, null, scheme, hasAuthority);
if (char == _QUESTION) {
int numberSignIndex = -1;
@ -511,9 +510,14 @@ class Uri {
(userInfo.isNotEmpty || port != null || isFile)) {
host = "";
}
bool ensureLeadingSlash = host != null;
bool hasAuthority = (host != null);
path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,
ensureLeadingSlash, isFile);
scheme, hasAuthority);
if (scheme.isEmpty && host == null && !path.startsWith('/')) {
path = _normalizeRelativePath(path);
} else {
path = _removeDotSegments(path);
}
return new Uri._internal(scheme, userInfo, host, port,
path, query, fragment);
}
@ -953,15 +957,15 @@ class Uri {
host = "";
}
bool ensureLeadingSlash = (host != null);
bool hasAuthority = host != null;
if (path != null || pathSegments != null) {
path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,
ensureLeadingSlash, isFile);
scheme, hasAuthority);
} else {
path = this.path;
if ((isFile || (ensureLeadingSlash && !path.isEmpty)) &&
if ((isFile || (hasAuthority && !path.isEmpty)) &&
!path.startsWith('/')) {
path = "/$path";
path = "/" + path;
}
}
@ -1025,20 +1029,23 @@ class Uri {
}
/**
* Returns an URI where the path has been normalized.
* Returns a URI where the path has been normalized.
*
* A normalized path does not contain `.` segments or non-leading `..`
* segments.
* Only a relative path may contain leading `..` segments,
* Only a relative path with no scheme or authority may contain
* leading `..` segments,
* a path that starts with `/` will also drop any leading `..` segments.
*
* This uses the same normalization strategy as [resolveUri], as specified by
* RFC 3986.
* This uses the same normalization strategy as `new Uri().resolve(this)`.
*
* Does not change any part of the URI except the path.
*
* The default implementation of `Uri` always normalizes paths, so calling
* this function has no effect.
*/
Uri normalizePath() {
String path = _removeDotSegments(_path);
String path = _normalizePath(_path, scheme, hasAuthority);
if (identical(path, _path)) return this;
return this.replace(path: path);
}
@ -1178,18 +1185,18 @@ class Uri {
if (!_isAlphabeticCharacter(firstCodeUnit)) {
_fail(scheme, start, "Scheme not starting with alphabetic character");
}
bool allLowercase = firstCodeUnit >= _LOWER_CASE_A;
bool containsUpperCase = false;
for (int i = start; i < end; i++) {
final int codeUnit = scheme.codeUnitAt(i);
if (!_isSchemeCharacter(codeUnit)) {
_fail(scheme, i, "Illegal scheme character");
}
if (codeUnit < _LOWER_CASE_A || codeUnit > _LOWER_CASE_Z) {
allLowercase = false;
if (_UPPER_CASE_A <= codeUnit && codeUnit <= _UPPER_CASE_Z) {
containsUpperCase = true;
}
}
scheme = scheme.substring(start, end);
if (!allLowercase) scheme = scheme.toLowerCase();
if (containsUpperCase) scheme = scheme.toLowerCase();
return scheme;
}
@ -1200,8 +1207,10 @@ class Uri {
static String _makePath(String path, int start, int end,
Iterable<String> pathSegments,
bool ensureLeadingSlash,
bool isFile) {
String scheme,
bool hasAuthority) {
bool isFile = (scheme == "file");
bool ensureLeadingSlash = isFile || hasAuthority;
if (path == null && pathSegments == null) return isFile ? "/" : "";
if (path != null && pathSegments != null) {
throw new ArgumentError('Both path and pathSegments specified');
@ -1214,13 +1223,25 @@ class Uri {
}
if (result.isEmpty) {
if (isFile) return "/";
} else if ((isFile || ensureLeadingSlash) &&
result.codeUnitAt(0) != _SLASH) {
return "/$result";
} else if (ensureLeadingSlash && !result.startsWith('/')) {
result = "/" + result;
}
result = _normalizePath(result, scheme, hasAuthority);
return result;
}
/// Performs path normalization (remove dot segments) on a path.
///
/// If the URI has neither scheme nor authority, it's considered a
/// "pure path" and normalization won't remove leading ".." segments.
/// Otherwise it follows the RFC 3986 "remove dot segments" algorithm.
static String _normalizePath(String path, String scheme, bool hasAuthority) {
if (scheme.isEmpty && !hasAuthority && !path.startsWith('/')) {
return _normalizeRelativePath(path);
}
return _removeDotSegments(path);
}
static String _makeQuery(String query, int start, int end,
Map<String, String> queryParameters) {
if (query == null && queryParameters == null) return null;
@ -1429,8 +1450,7 @@ class Uri {
*/
bool get isAbsolute => scheme != "" && fragment == "";
String _merge(String base, String reference) {
if (base.isEmpty) return "/$reference";
String _mergePaths(String base, String reference) {
// Optimize for the case: absolute base, reference beginning with "../".
int backCount = 0;
int refStart = 0;
@ -1463,21 +1483,36 @@ class Uri {
reference.substring(refStart - 3 * backCount));
}
bool _hasDotSegments(String path) {
if (path.length > 0 && path.codeUnitAt(0) == _DOT) return true;
/// Make a guess at whether a path contains a `..` or `.` segment.
///
/// This is a primitive test that can cause false positives.
/// It's only used to avoid a more expensive operation in the case where
/// it's not necessary.
static bool _mayContainDotSegments(String path) {
if (path.startsWith('.')) return true;
int index = path.indexOf("/.");
return index != -1;
}
String _removeDotSegments(String path) {
if (!_hasDotSegments(path)) return path;
/// Removes '.' and '..' segments from a path.
///
/// Follows the RFC 2986 "remove dot segments" algorithm.
/// This algorithm is only used on paths of URIs with a scheme,
/// and it treats the path as if it is absolute (leading '..' are removed).
static String _removeDotSegments(String path) {
if (!_mayContainDotSegments(path)) return path;
assert(path.isNotEmpty); // An empty path would not have dot segments.
List<String> output = [];
bool appendSlash = false;
for (String segment in path.split("/")) {
appendSlash = false;
if (segment == "..") {
if (!output.isEmpty &&
((output.length != 1) || (output[0] != ""))) output.removeLast();
if (output.isNotEmpty) {
output.removeLast();
if (output.isEmpty) {
output.add("");
}
}
appendSlash = true;
} else if ("." == segment) {
appendSlash = true;
@ -1489,6 +1524,42 @@ class Uri {
return output.join("/");
}
/// Removes all `.` segments and any non-leading `..` segments.
///
/// Removing the ".." from a "bar/foo/.." sequence results in "bar/"
/// (trailing "/"). If the entire path is removed (because it contains as
/// many ".." segments as real segments), the result is "./".
/// This is different from an empty string, which represents "no path",
/// when you resolve it against a base URI with a path with a non-empty
/// final segment.
static String _normalizeRelativePath(String path) {
assert(!path.startsWith('/')); // Only get called for relative paths.
if (!_mayContainDotSegments(path)) return path;
assert(path.isNotEmpty); // An empty path would not have dot segments.
List<String> output = [];
bool appendSlash = false;
for (String segment in path.split("/")) {
appendSlash = false;
if (".." == segment) {
if (!output.isEmpty && output.last != "..") {
output.removeLast();
appendSlash = true;
} else {
output.add("..");
}
} else if ("." == segment) {
appendSlash = true;
} else {
output.add(segment);
}
}
if (output.isEmpty || (output.length == 1 && output[0].isEmpty)) {
return "./";
}
if (appendSlash || output.last == '..') output.add("");
return output.join("/");
}
/**
* Resolve [reference] as an URI relative to `this`.
*
@ -1508,9 +1579,15 @@ class Uri {
*
* Returns the resolved URI.
*
* The algorithm for resolving a reference is described in
* [RFC-3986 Section 5]
* The algorithm "Transform Reference" for resolving a reference is
* described in [RFC-3986 Section 5]
* (http://tools.ietf.org/html/rfc3986#section-5 "RFC-1123").
*
* Updated to handle the case where the base URI is just a relative path -
* that is: when it has no scheme or authority and the path does not start
* with a slash.
* In that case, the paths are combined without removing leading "..", and
* an empty path is not converted to "/".
*/
Uri resolveUri(Uri reference) {
// From RFC 3986.
@ -1541,6 +1618,9 @@ class Uri {
targetPath = _removeDotSegments(reference.path);
if (reference.hasQuery) targetQuery = reference.query;
} else {
targetUserInfo = this._userInfo;
targetHost = this._host;
targetPort = this._port;
if (reference.path == "") {
targetPath = this._path;
if (reference.hasQuery) {
@ -1549,16 +1629,32 @@ class Uri {
targetQuery = this._query;
}
} else {
if (reference.path.startsWith("/")) {
if (reference.hasAbsolutePath) {
targetPath = _removeDotSegments(reference.path);
} else {
targetPath = _removeDotSegments(_merge(this._path, reference.path));
// This is the RFC 3986 behavior for merging.
if (this.hasEmptyPath) {
if (!this.hasScheme && !this.hasAuthority) {
// Keep the path relative if no scheme or authority.
targetPath = reference.path;
} else {
// Add path normalization on top of RFC algorithm.
targetPath = _removeDotSegments("/" + reference.path);
}
} else {
var mergedPath = _mergePaths(this._path, reference.path);
if (this.hasScheme || this.hasAuthority || this.hasAbsolutePath) {
targetPath = _removeDotSegments(mergedPath);
} else {
// Non-RFC 3986 beavior. If both base and reference are relative
// path, allow the merged path to start with "..".
// The RFC only specifies the case where the base has a scheme.
targetPath = _normalizeRelativePath(mergedPath);
}
}
}
if (reference.hasQuery) targetQuery = reference.query;
}
targetUserInfo = this._userInfo;
targetHost = this._host;
targetPort = this._port;
}
}
String fragment = reference.hasFragment ? reference.fragment : null;
@ -1571,6 +1667,11 @@ class Uri {
fragment);
}
/**
* Returns whether the URI has a [scheme] component.
*/
bool get hasScheme => scheme.isNotEmpty;
/**
* Returns whether the URI has an [authority] component.
*/
@ -1596,6 +1697,16 @@ class Uri {
*/
bool get hasFragment => _fragment != null;
/**
* Returns whether the URI has an empty path.
*/
bool get hasEmptyPath => _path.isEmpty;
/**
* Returns whether the URI has an absolute path (starting with '/').
*/
bool get hasAbsolutePath => _path.startsWith('/');
/**
* Returns the origin of the URI in the form scheme://host:port for the
* schemes http and https.

View file

@ -45,7 +45,7 @@ main() {
test("./../foo", "foo");
test("./../", "");
test("./../.", "");
test("foo/bar/baz/../../../../qux", "qux");
test("foo/bar/baz/../../../../qux", "/qux");
test("/foo/bar/baz/../../../../qux", "/qux");
test(".", "");
test("..", "");

View file

@ -5,10 +5,12 @@
import "package:expect/expect.dart";
testNormalizePath() {
test(String expected, String path) {
var uri = new Uri(path: path);
Expect.equals(expected, uri.path);
test(String expected, String path, {String scheme, String host}) {
var uri = new Uri(scheme: scheme, host: host, path: path);
Expect.equals(expected, uri.toString());
if (scheme == null && host == null) {
Expect.equals(expected, uri.path);
}
}
var unreserved = "-._~0123456789"
@ -32,9 +34,42 @@ testNormalizePath() {
x.write(i.toRadixString(16));
}
}
print(x.toString().toUpperCase());
Expect.equals(x.toString().toUpperCase(),
new Uri(path: x.toString()).toString().toUpperCase());
// Normalized paths.
// Full absolute path normalization for absolute paths.
test("/a/b/c/", "/../a/./b/z/../c/d/..");
test("/a/b/c/", "/./a/b/c/");
test("/a/b/c/", "/./../a/b/c/");
test("/a/b/c/", "/./../a/b/c/.");
test("/a/b/c/", "/./../a/b/c/z/./..");
test("/", "/a/..");
// Full absolute path normalization for URIs with scheme.
test("s:a/b/c/", "../a/./b/z/../c/d/..", scheme: "s");
test("s:a/b/c/", "./a/b/c/", scheme: "s");
test("s:a/b/c/", "./../a/b/c/", scheme: "s");
test("s:a/b/c/", "./../a/b/c/.", scheme: "s");
test("s:a/b/c/", "./../a/b/c/z/./..", scheme: "s");
test("s:/", "/a/..", scheme: "s");
test("s:/", "a/..", scheme: "s");
// Full absolute path normalization for URIs with authority.
test("//h/a/b/c/", "../a/./b/z/../c/d/..", host: "h");
test("//h/a/b/c/", "./a/b/c/", host: "h");
test("//h/a/b/c/", "./../a/b/c/", host: "h");
test("//h/a/b/c/", "./../a/b/c/.", host: "h");
test("//h/a/b/c/", "./../a/b/c/z/./..", host: "h");
test("//h/", "/a/..", host: "h");
test("//h/", "a/..", host: "h");
// Partial relative normalization (allowing leading .. or ./ for current dir).
test("../a/b/c/", "../a/./b/z/../c/d/..");
test("a/b/c/", "./a/b/c/");
test("../a/b/c/", "./../a/b/c/");
test("../a/b/c/", "./../a/b/c/.");
test("../a/b/c/", "./../a/b/c/z/./..");
test("/", "/a/..");
test("./", "a/..");
}
main() {

View file

@ -61,64 +61,74 @@ testEncodeDecodeQueryComponent(String orig,
}
}
testUriPerRFCs(Uri base) {
testUriPerRFCs() {
final urisSample = "http://a/b/c/d;p?q";
Uri base = Uri.parse(urisSample);
testResolve(expect, relative) {
Expect.stringEquals(expect, base.resolve(relative).toString());
}
// From RFC 3986.
Expect.stringEquals("g:h", base.resolve("g:h").toString());
Expect.stringEquals("http://a/b/c/g", base.resolve("g").toString());
Expect.stringEquals("http://a/b/c/g", base.resolve("./g").toString());
Expect.stringEquals("http://a/b/c/g/", base.resolve("g/").toString());
Expect.stringEquals("http://a/g", base.resolve("/g").toString());
Expect.stringEquals("http://g", base.resolve("//g").toString());
Expect.stringEquals("http://a/b/c/d;p?y", base.resolve("?y").toString());
Expect.stringEquals("http://a/b/c/g?y", base.resolve("g?y").toString());
Expect.stringEquals("http://a/b/c/d;p?q#s", base.resolve("#s").toString());
Expect.stringEquals("http://a/b/c/g#s", base.resolve("g#s").toString());
Expect.stringEquals("http://a/b/c/g?y#s", base.resolve("g?y#s").toString());
Expect.stringEquals("http://a/b/c/;x", base.resolve(";x").toString());
Expect.stringEquals("http://a/b/c/g;x", base.resolve("g;x").toString());
Expect.stringEquals("http://a/b/c/g;x?y#s",
base.resolve("g;x?y#s").toString());
Expect.stringEquals("http://a/b/c/d;p?q", base.resolve("").toString());
Expect.stringEquals("http://a/b/c/", base.resolve(".").toString());
Expect.stringEquals("http://a/b/c/", base.resolve("./").toString());
Expect.stringEquals("http://a/b/", base.resolve("..").toString());
Expect.stringEquals("http://a/b/", base.resolve("../").toString());
Expect.stringEquals("http://a/b/g", base.resolve("../g").toString());
Expect.stringEquals("http://a/", base.resolve("../..").toString());
Expect.stringEquals("http://a/", base.resolve("../../").toString());
Expect.stringEquals("http://a/g", base.resolve("../../g").toString());
Expect.stringEquals("http://a/g", base.resolve("../../../g").toString());
Expect.stringEquals("http://a/g", base.resolve("../../../../g").toString());
Expect.stringEquals("http://a/g", base.resolve("/./g").toString());
Expect.stringEquals("http://a/g", base.resolve("/../g").toString());
Expect.stringEquals("http://a/b/c/g.", base.resolve("g.").toString());
Expect.stringEquals("http://a/b/c/.g", base.resolve(".g").toString());
Expect.stringEquals("http://a/b/c/g..", base.resolve("g..").toString());
Expect.stringEquals("http://a/b/c/..g", base.resolve("..g").toString());
Expect.stringEquals("http://a/b/g", base.resolve("./../g").toString());
Expect.stringEquals("http://a/b/c/g/", base.resolve("./g/.").toString());
Expect.stringEquals("http://a/b/c/g/h", base.resolve("g/./h").toString());
Expect.stringEquals("http://a/b/c/h", base.resolve("g/../h").toString());
Expect.stringEquals("http://a/b/c/g;x=1/y",
base.resolve("g;x=1/./y").toString());
Expect.stringEquals("http://a/b/c/y", base.resolve("g;x=1/../y").toString());
Expect.stringEquals("http://a/b/c/g?y/./x",
base.resolve("g?y/./x").toString());
Expect.stringEquals("http://a/b/c/g?y/../x",
base.resolve("g?y/../x").toString());
Expect.stringEquals("http://a/b/c/g#s/./x",
base.resolve("g#s/./x").toString());
Expect.stringEquals("http://a/b/c/g#s/../x",
base.resolve("g#s/../x").toString());
Expect.stringEquals("http:g", base.resolve("http:g").toString());
testResolve("g:h", "g:h");
testResolve("http://a/b/c/g", "g");
testResolve("http://a/b/c/g", "./g");
testResolve("http://a/b/c/g/", "g/");
testResolve("http://a/g", "/g");
testResolve("http://g", "//g");
testResolve("http://a/b/c/d;p?y", "?y");
testResolve("http://a/b/c/g?y", "g?y");
testResolve("http://a/b/c/d;p?q#s", "#s");
testResolve("http://a/b/c/g#s", "g#s");
testResolve("http://a/b/c/g?y#s", "g?y#s");
testResolve("http://a/b/c/;x", ";x");
testResolve("http://a/b/c/g;x", "g;x");
testResolve("http://a/b/c/g;x?y#s", "g;x?y#s");
testResolve("http://a/b/c/d;p?q", "");
testResolve("http://a/b/c/", ".");
testResolve("http://a/b/c/", "./");
testResolve("http://a/b/", "..");
testResolve("http://a/b/", "../");
testResolve("http://a/b/g", "../g");
testResolve("http://a/", "../..");
testResolve("http://a/", "../../");
testResolve("http://a/g", "../../g");
testResolve("http://a/g", "../../../g");
testResolve("http://a/g", "../../../../g");
testResolve("http://a/g", "/./g");
testResolve("http://a/g", "/../g");
testResolve("http://a/b/c/g.", "g.");
testResolve("http://a/b/c/.g", ".g");
testResolve("http://a/b/c/g..", "g..");
testResolve("http://a/b/c/..g", "..g");
testResolve("http://a/b/g", "./../g");
testResolve("http://a/b/c/g/", "./g/.");
testResolve("http://a/b/c/g/h", "g/./h");
testResolve("http://a/b/c/h", "g/../h");
testResolve("http://a/b/c/g;x=1/y", "g;x=1/./y");
testResolve("http://a/b/c/y", "g;x=1/../y");
testResolve("http://a/b/c/g?y/./x", "g?y/./x");
testResolve("http://a/b/c/g?y/../x", "g?y/../x");
testResolve("http://a/b/c/g#s/./x", "g#s/./x");
testResolve("http://a/b/c/g#s/../x", "g#s/../x");
testResolve("http:g", "http:g");
// Additional tests (not from RFC 3986).
Expect.stringEquals("http://a/b/g;p/h;s",
base.resolve("../g;p/h;s").toString());
testResolve("http://a/b/g;p/h;s", "../g;p/h;s");
// Test non-URI base (no scheme, no authority, relative path).
base = Uri.parse("a/b/c?_#_");
testResolve("a/b/g?q#f", "g?q#f");
testResolve("../", "../../..");
testResolve("a/b/", ".");
testResolve("c", "../../c");
base = Uri.parse("s:a/b");
testResolve("s:/c", "../c");
}
void testResolvePath(String expected, String path) {
Expect.equals(expected, new Uri().resolveUri(new Uri(path: path)).path);
Expect.equals(expected,
new Uri(path: '/').resolveUri(new Uri(path: path)).path);
Expect.equals(
"http://localhost$expected",
Uri.parse("http://localhost").resolveUri(new Uri(path: path)).toString());
@ -471,11 +481,7 @@ main() {
testResolvePath("/a/b/e/", "./a/b/./c/d/../../e/./.");
testResolvePath("/a/b/e/", "./a/b/./c/d/../../e/././.");
final urisSample = "http://a/b/c/d;p?q";
Uri baseFromString = Uri.parse(urisSample);
testUriPerRFCs(baseFromString);
Uri base = Uri.parse(urisSample);
testUriPerRFCs(base);
testUriPerRFCs();
Expect.stringEquals(
"http://example.com",