fix(URL): Implement spec-compliant host parsing (#6689)

This commit is contained in:
Nayeem Rahman 2020-07-10 20:51:24 +01:00 committed by GitHub
parent 39dba12a06
commit 69e0886362
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 160 additions and 49 deletions

1
Cargo.lock generated
View file

@ -393,6 +393,7 @@ dependencies = [
"futures 0.3.5",
"fwdansi",
"http",
"idna",
"indexmap",
"lazy_static",
"libc",

View file

@ -33,6 +33,7 @@ dlopen = "0.1.8"
dprint-plugin-typescript = "0.19.5"
futures = { version = "0.3.5", features = ["compat", "io-compat"] }
http = "0.2.1"
idna = "0.2.0"
indexmap = "1.4.0"
lazy_static = "1.4.0"
libc = "0.2.71"

12
cli/js/ops/idna.ts Normal file
View file

@ -0,0 +1,12 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
/** https://url.spec.whatwg.org/#idna */
import { sendSync } from "./dispatch_json.ts";
export function domainToAscii(
domain: string,
{ beStrict = false }: { beStrict?: boolean } = {}
): string {
return sendSync("op_domain_to_ascii", { domain, beStrict });
}

View file

@ -1,11 +1,14 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { build } from "../build.ts";
import { getRandomValues } from "../ops/get_random_values.ts";
import { domainToAscii } from "../ops/idna.ts";
import { customInspect } from "./console.ts";
import { TextEncoder } from "./text_encoding.ts";
import { urls } from "./url_search_params.ts";
interface URLParts {
protocol: string;
slashes: string;
username: string;
password: string;
hostname: string;
@ -57,7 +60,9 @@ function parse(url: string, isBase = true): URLParts | undefined {
if (isBase && parts.protocol == "") {
return undefined;
}
const isSpecial = specialSchemes.includes(parts.protocol);
if (parts.protocol == "file") {
parts.slashes = "//";
parts.username = "";
parts.password = "";
[parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2}([^/\\?#]*)/);
@ -68,7 +73,8 @@ function parse(url: string, isBase = true): URLParts | undefined {
// equivalent to: `new URL("file://localhost/foo/bar")`.
[parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]*)/);
}
} else if (specialSchemes.includes(parts.protocol)) {
} else if (isSpecial) {
parts.slashes = "//";
let restAuthority;
[restAuthority, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]+)/);
if (isBase && restAuthority == "") {
@ -92,17 +98,18 @@ function parse(url: string, isBase = true): URLParts | undefined {
return undefined;
}
} else {
[parts.slashes, restUrl] = takePattern(restUrl, /^([/\\]{2})/);
parts.username = "";
parts.password = "";
parts.hostname = "";
if (parts.slashes) {
[parts.hostname, restUrl] = takePattern(restUrl, /^([^/\\?#]*)/);
} else {
parts.hostname = "";
}
parts.port = "";
}
try {
const IPv6re = /^\[[0-9a-fA-F.:]{2,}\]$/;
if (!IPv6re.test(parts.hostname)) {
parts.hostname = encodeHostname(parts.hostname); // Non-IPv6 URLs
}
parts.hostname = parts.hostname.toLowerCase();
parts.hostname = encodeHostname(parts.hostname, isSpecial);
} catch {
return undefined;
}
@ -298,7 +305,8 @@ export class URLImpl implements URL {
set hostname(value: string) {
value = String(value);
try {
parts.get(this)!.hostname = encodeHostname(value);
const isSpecial = specialSchemes.includes(parts.get(this)!.protocol);
parts.get(this)!.hostname = encodeHostname(value, isSpecial);
} catch {}
}
@ -307,11 +315,9 @@ export class URLImpl implements URL {
this.username || this.password
? `${this.username}${this.password ? ":" + this.password : ""}@`
: "";
let slash = "";
if (this.host || this.protocol === "file:") {
slash = "//";
}
return `${this.protocol}${slash}${authentication}${this.host}${this.pathname}${this.search}${this.hash}`;
return `${this.protocol}${parts.get(this)!.slashes}${authentication}${
this.host
}${this.pathname}${this.search}${this.hash}`;
}
set href(value: string) {
@ -427,6 +433,7 @@ export class URLImpl implements URL {
} else if (baseParts) {
parts.set(this, {
protocol: baseParts.protocol,
slashes: baseParts.slashes,
username: baseParts.username,
password: baseParts.password,
hostname: baseParts.hostname,
@ -479,7 +486,7 @@ export class URLImpl implements URL {
}
function charInC0ControlSet(c: string): boolean {
return c >= "\u0000" && c <= "\u001F";
return (c >= "\u0000" && c <= "\u001F") || c > "\u007E";
}
function charInSearchSet(c: string): boolean {
@ -503,20 +510,72 @@ function charInUserinfoSet(c: string): boolean {
return charInPathSet(c) || ["\u0027", "\u002F", "\u003A", "\u003B", "\u003D", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E", "\u007C"].includes(c);
}
function charIsForbiddenInHost(c: string): boolean {
// prettier-ignore
return ["\u0000", "\u0009", "\u000A", "\u000D", "\u0020", "\u0023", "\u0025", "\u002F", "\u003A", "\u003C", "\u003E", "\u003F", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E"].includes(c);
}
const encoder = new TextEncoder();
function encodeChar(c: string): string {
return `%${c.charCodeAt(0).toString(16)}`.toUpperCase();
return [...encoder.encode(c)]
.map((n) => `%${n.toString(16)}`)
.join("")
.toUpperCase();
}
function encodeUserinfo(s: string): string {
return [...s].map((c) => (charInUserinfoSet(c) ? encodeChar(c) : c)).join("");
}
function encodeHostname(s: string): string {
// FIXME: https://url.spec.whatwg.org/#idna
if (s.includes(":")) {
function encodeHostname(s: string, isSpecial = true): string {
// IPv6 parsing.
if (s.startsWith("[") && s.endsWith("]")) {
if (!s.match(/^\[[0-9A-Fa-f.:]{2,}\]$/)) {
throw new TypeError("Invalid hostname.");
}
return s.toLowerCase();
}
let result = s;
if (!isSpecial) {
// Check against forbidden host code points except for "%".
for (const c of result) {
if (charIsForbiddenInHost(c) && c != "\u0025") {
throw new TypeError("Invalid hostname.");
}
}
// Percent-encode C0 control set.
result = [...result]
.map((c) => (charInC0ControlSet(c) ? encodeChar(c) : c))
.join("");
return result;
}
// Percent-decode.
if (result.match(/%(?![0-9A-Fa-f]{2})/) != null) {
throw new TypeError("Invalid hostname.");
}
return encodeURIComponent(s);
result = result.replace(/%(.{2})/g, (_, hex) =>
String.fromCodePoint(Number(`0x${hex}`))
);
// IDNA domain to ASCII.
result = domainToAscii(result);
// Check against forbidden host code points.
for (const c of result) {
if (charIsForbiddenInHost(c)) {
throw new TypeError("Invalid hostname.");
}
}
// TODO(nayeemrmn): IPv4 parsing.
return result;
}
function encodePathname(s: string): string {

43
cli/ops/idna.rs Normal file
View file

@ -0,0 +1,43 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
//! https://url.spec.whatwg.org/#idna
use super::dispatch_json::{Deserialize, JsonOp, Value};
use crate::op_error::{ErrorKind, OpError};
use crate::state::State;
use deno_core::CoreIsolate;
use deno_core::ZeroCopyBuf;
use idna::{domain_to_ascii, domain_to_ascii_strict};
pub fn init(i: &mut CoreIsolate, s: &State) {
i.register_op("op_domain_to_ascii", s.stateful_json_op(op_domain_to_ascii));
}
fn invalid_domain_error() -> OpError {
OpError {
kind: ErrorKind::TypeError,
msg: "Invalid domain.".to_string(),
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct DomainToAscii {
domain: String,
be_strict: bool,
}
fn op_domain_to_ascii(
_state: &State,
args: Value,
_zero_copy: &mut [ZeroCopyBuf],
) -> Result<JsonOp, OpError> {
let args: DomainToAscii = serde_json::from_value(args)?;
let domain = if args.be_strict {
domain_to_ascii_strict(args.domain.as_str())
.map_err(|_| invalid_domain_error())?
} else {
domain_to_ascii(args.domain.as_str()).map_err(|_| invalid_domain_error())?
};
Ok(JsonOp::Sync(json!(domain)))
}

View file

@ -13,6 +13,7 @@ pub mod errors;
pub mod fetch;
pub mod fs;
pub mod fs_events;
pub mod idna;
pub mod io;
pub mod net;
#[cfg(unix)]

View file

@ -25,32 +25,25 @@ unitTest(function urlParsing(): void {
String(url),
"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"
);
assertEquals(
JSON.stringify({ key: url }),
`{"key":"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"}`
);
});
// IPv6 type hostname.
const urlv6 = new URL(
"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"
);
assertEquals(urlv6.origin, "https://[::1]:8000");
assertEquals(urlv6.password, "bar");
assertEquals(urlv6.pathname, "/qux/quux");
assertEquals(urlv6.port, "8000");
assertEquals(urlv6.protocol, "https:");
assertEquals(urlv6.search, "?foo=bar&baz=12");
assertEquals(urlv6.searchParams.getAll("foo"), ["bar"]);
assertEquals(urlv6.searchParams.getAll("baz"), ["12"]);
assertEquals(urlv6.username, "foo");
assertEquals(
String(urlv6),
"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"
);
assertEquals(
JSON.stringify({ key: urlv6 }),
`{"key":"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"}`
);
unitTest(function urlHostParsing(): void {
// IPv6.
assertEquals(new URL("https://foo:bar@[::1]:8000").hostname, "[::1]");
// Forbidden host code point.
assertThrows(() => new URL("https:// a"), TypeError, "Invalid URL.");
assertThrows(() => new URL("abcde:// a"), TypeError, "Invalid URL.");
assertThrows(() => new URL("https://%"), TypeError, "Invalid URL.");
assertEquals(new URL("abcde://%").hostname, "%");
// Percent-decode.
assertEquals(new URL("https://%21").hostname, "!");
assertEquals(new URL("abcde://%21").hostname, "%21");
// TODO(nayeemrmn): IPv4 parsing.
// assertEquals(new URL("https://260").hostname, "0.0.1.4");
assertEquals(new URL("abcde://260").hostname, "260");
});
unitTest(function urlModifications(): void {
@ -208,6 +201,7 @@ unitTest(function urlUncHostname() {
unitTest(function urlHostnameUpperCase() {
assertEquals(new URL("https://EXAMPLE.COM").href, "https://example.com/");
assertEquals(new URL("abcde://EXAMPLE.COM").href, "abcde://EXAMPLE.COM/");
});
unitTest(function urlTrim() {
@ -223,11 +217,9 @@ unitTest(function urlEncoding() {
new URL("https://:a !$&*()=,;+'\"@example.com").password,
"a%20!$&*()%3D,%3B+%27%22"
);
// FIXME: https://url.spec.whatwg.org/#idna
// assertEquals(
// new URL("https://a !$&*()=,+'\"").hostname,
// "a%20%21%24%26%2A%28%29%3D%2C+%27%22"
// );
assertEquals(new URL("abcde://mañana/c?d#e").hostname, "ma%C3%B1ana");
// https://url.spec.whatwg.org/#idna
assertEquals(new URL("https://mañana/c?d#e").hostname, "xn--maana-pta");
assertEquals(
new URL("https://example.com/a ~!@$&*()=:/,;+'\"\\").pathname,
"/a%20~!@$&*()=:/,;+'%22/"

View file

@ -121,6 +121,7 @@ impl WebWorker {
handle,
);
ops::worker_host::init(isolate, &state);
ops::idna::init(isolate, &state);
ops::io::init(isolate, &state);
ops::resources::init(isolate, &state);
ops::errors::init(isolate, &state);

View file

@ -261,6 +261,7 @@ impl MainWorker {
ops::fetch::init(isolate, &state);
ops::fs::init(isolate, &state);
ops::fs_events::init(isolate, &state);
ops::idna::init(isolate, &state);
ops::io::init(isolate, &state);
ops::plugin::init(isolate, &state);
ops::net::init(isolate, &state);