feat(cli): support data urls (#8866)

Closes: #5059

Co-authored-by: Valentin Anger <syrupthinker@gryphno.de>
This commit is contained in:
Kitson Kelly 2021-01-06 13:22:38 +11:00 committed by GitHub
parent 60c9c85758
commit 54240c22af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 380 additions and 49 deletions

View file

@ -67,7 +67,7 @@ impl DiskCache {
out.push(path_seg);
}
}
"http" | "https" => out = url_to_filename(url),
"http" | "https" | "data" => out = url_to_filename(url),
"file" => {
let path = match url.to_file_path() {
Ok(path) => path,

View file

@ -27,7 +27,7 @@ use std::pin::Pin;
use std::sync::Arc;
use std::sync::Mutex;
pub const SUPPORTED_SCHEMES: [&str; 3] = ["http", "https", "file"];
pub const SUPPORTED_SCHEMES: [&str; 4] = ["data", "file", "http", "https"];
/// A structure representing a source file.
#[derive(Debug, Clone, Eq, PartialEq)]
@ -145,6 +145,41 @@ pub fn get_source_from_bytes(
Ok(source)
}
fn get_source_from_data_url(
specifier: &ModuleSpecifier,
) -> Result<(String, MediaType, String), AnyError> {
let url = specifier.as_url();
if url.scheme() != "data" {
return Err(custom_error(
"BadScheme",
format!("Unexpected scheme of \"{}\"", url.scheme()),
));
}
let path = url.path();
let mut parts = path.splitn(2, ',');
let media_type_part =
percent_encoding::percent_decode_str(parts.next().unwrap())
.decode_utf8()?;
let data_part = if let Some(data) = parts.next() {
data
} else {
return Err(custom_error(
"BadUrl",
"The data URL is badly formed, missing a comma.",
));
};
let (media_type, maybe_charset) =
map_content_type(specifier, Some(media_type_part.to_string()));
let is_base64 = media_type_part.rsplit(';').any(|p| p == "base64");
let bytes = if is_base64 {
base64::decode(data_part)?
} else {
percent_encoding::percent_decode_str(data_part).collect()
};
let source = strip_shebang(get_source_from_bytes(bytes, maybe_charset)?);
Ok((source, media_type, media_type_part.to_string()))
}
/// Return a validated scheme for a given module specifier.
fn get_validated_scheme(
specifier: &ModuleSpecifier,
@ -185,6 +220,8 @@ pub fn map_content_type(
| "application/node" => {
map_js_like_extension(specifier, MediaType::JavaScript)
}
"text/jsx" => MediaType::JSX,
"text/tsx" => MediaType::TSX,
"application/json" | "text/json" => MediaType::Json,
"application/wasm" => MediaType::Wasm,
// Handle plain and possibly webassembly
@ -354,6 +391,47 @@ impl FileFetcher {
Ok(Some(file))
}
/// Convert a data URL into a file, resulting in an error if the URL is
/// invalid.
fn fetch_data_url(
&self,
specifier: &ModuleSpecifier,
) -> Result<File, AnyError> {
debug!("FileFetcher::fetch_data_url() - specifier: {}", specifier);
match self.fetch_cached(specifier, 0) {
Ok(Some(file)) => return Ok(file),
Ok(None) => {}
Err(err) => return Err(err),
}
if self.cache_setting == CacheSetting::Only {
return Err(custom_error(
"NotFound",
format!(
"Specifier not found in cache: \"{}\", --cached-only is specified.",
specifier
),
));
}
let (source, media_type, content_type) =
get_source_from_data_url(specifier)?;
let local = self.http_cache.get_cache_filename(specifier.as_url());
let mut headers = HashMap::new();
headers.insert("content-type".to_string(), content_type);
self
.http_cache
.set(specifier.as_url(), headers, source.as_bytes())?;
Ok(File {
local,
maybe_types: None,
media_type,
source,
specifier: specifier.clone(),
})
}
/// Asynchronously fetch remote source file specified by the URL following
/// redirects.
///
@ -450,26 +528,27 @@ impl FileFetcher {
permissions.check_specifier(specifier)?;
if let Some(file) = self.cache.get(specifier) {
Ok(file)
} else {
let is_local = scheme == "file";
if is_local {
fetch_local(specifier)
} else if !self.allow_remote {
Err(custom_error(
"NoRemote",
format!("A remote specifier was requested: \"{}\", but --no-remote is specified.", specifier),
))
} else {
let result = self.fetch_remote(specifier, permissions, 10).await;
// only cache remote resources, as they are the only things that would
// be "expensive" to fetch multiple times during an invocation, and it
// also allows local file sources to be changed, enabling things like
// dynamic import and workers to be updated while Deno is running.
if let Ok(file) = &result {
self.cache.insert(specifier.clone(), file.clone());
}
result
} else if scheme == "file" {
// we do not in memory cache files, as this would prevent files on the
// disk changing effecting things like workers and dynamic imports.
fetch_local(specifier)
} else if scheme == "data" {
let result = self.fetch_data_url(specifier);
if let Ok(file) = &result {
self.cache.insert(specifier.clone(), file.clone());
}
result
} else if !self.allow_remote {
Err(custom_error(
"NoRemote",
format!("A remote specifier was requested: \"{}\", but --no-remote is specified.", specifier),
))
} else {
let result = self.fetch_remote(specifier, permissions, 10).await;
if let Ok(file) = &result {
self.cache.insert(specifier.clone(), file.clone());
}
result
}
}
@ -581,6 +660,39 @@ mod tests {
assert_eq!(file.source, expected);
}
#[test]
fn test_get_source_from_data_url() {
let fixtures = vec![
("data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", true, MediaType::TypeScript, "application/typescript;base64", "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n"),
("data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=?a=b&b=c", true, MediaType::TypeScript, "application/typescript;base64", "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n"),
("data:text/plain,Hello%2C%20Deno!", true, MediaType::Unknown, "text/plain", "Hello, Deno!"),
("data:,Hello%2C%20Deno!", true, MediaType::Unknown, "", "Hello, Deno!"),
("data:application/javascript,console.log(\"Hello, Deno!\");%0A", true, MediaType::JavaScript, "application/javascript", "console.log(\"Hello, Deno!\");\n"),
("data:text/jsx;base64,ZXhwb3J0IGRlZmF1bHQgZnVuY3Rpb24oKSB7CiAgcmV0dXJuIDxkaXY+SGVsbG8gRGVubyE8L2Rpdj4KfQo=", true, MediaType::JSX, "text/jsx;base64", "export default function() {\n return <div>Hello Deno!</div>\n}\n"),
("data:text/tsx;base64,ZXhwb3J0IGRlZmF1bHQgZnVuY3Rpb24oKSB7CiAgcmV0dXJuIDxkaXY+SGVsbG8gRGVubyE8L2Rpdj4KfQo=", true, MediaType::TSX, "text/tsx;base64", "export default function() {\n return <div>Hello Deno!</div>\n}\n"),
];
for (
url_str,
expected_ok,
expected_media_type,
expected_media_type_str,
expected,
) in fixtures
{
let specifier = ModuleSpecifier::resolve_url(url_str).unwrap();
let actual = get_source_from_data_url(&specifier);
assert_eq!(actual.is_ok(), expected_ok);
if expected_ok {
let (actual, actual_media_type, actual_media_type_str) =
actual.unwrap();
assert_eq!(actual, expected);
assert_eq!(actual_media_type, expected_media_type);
assert_eq!(actual_media_type_str, expected_media_type_str);
}
}
}
#[test]
fn test_get_validated_scheme() {
let fixtures = vec![
@ -588,6 +700,7 @@ mod tests {
("http://deno.land/x/mod.ts", true, "http"),
("file:///a/b/c.ts", true, "file"),
("file:///C:/a/b/c.ts", true, "file"),
("data:,some%20text", true, "data"),
("ftp://a/b/c.ts", false, ""),
("mailto:dino@deno.land", false, ""),
];
@ -690,6 +803,18 @@ mod tests {
MediaType::JavaScript,
None,
),
(
"https://deno.land/x/mod",
Some("text/jsx".to_string()),
MediaType::JSX,
None,
),
(
"https://deno.land/x/mod",
Some("text/tsx".to_string()),
MediaType::TSX,
None,
),
(
"https://deno.land/x/mod",
Some("text/json".to_string()),
@ -827,6 +952,25 @@ mod tests {
assert_eq!(actual, expected);
}
#[tokio::test]
async fn test_fetch_data_url() {
let (file_fetcher, _) = setup(CacheSetting::Use, None);
let specifier = ModuleSpecifier::resolve_url("data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=").unwrap();
let result = file_fetcher
.fetch(&specifier, &Permissions::allow_all())
.await;
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(
file.source,
"export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n"
);
assert_eq!(file.media_type, MediaType::TypeScript);
assert_eq!(file.maybe_types, None);
assert_eq!(file.specifier, specifier);
}
#[tokio::test]
async fn test_fetch_complex() {
let _http_server_guard = test_util::http_server();

View file

@ -38,6 +38,7 @@ fn base_url_to_filename(url: &Url) -> PathBuf {
};
out.push(host_port);
}
"data" => (),
scheme => {
unimplemented!(
"Don't know how to create cache name for scheme: {}",
@ -253,6 +254,14 @@ mod tests {
"https://deno.land/?asdf=qwer#qwer",
"https/deno.land/e4edd1f433165141015db6a823094e6bd8f24dd16fe33f2abd99d34a0a21a3c0",
),
(
"data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=",
"data/c21c7fc382b2b0553dc0864aa81a3acacfb7b3d1285ab5ae76da6abec213fb37",
),
(
"data:text/plain,Hello%2C%20Deno!",
"data/967374e3561d6741234131e342bf5c6848b70b13758adfe23ee1a813a8131818",
)
];
for (url, expected) in test_cases.iter() {

View file

@ -252,7 +252,7 @@ impl ProgramState {
match url.scheme() {
// we should only be looking for emits for schemes that denote external
// modules, which the disk_cache supports
"wasm" | "file" | "http" | "https" => (),
"wasm" | "file" | "http" | "https" | "data" => (),
_ => {
return None;
}

View file

@ -305,7 +305,7 @@ impl SpecifierHandler for FetchHandler {
}
})?;
let url = source_file.specifier.as_url();
let is_remote = url.scheme() != "file";
let is_remote = !(url.scheme() == "file" || url.scheme() == "data");
let filename = disk_cache.get_cache_filename_with_extension(url, "meta");
let maybe_version = if let Some(filename) = filename {
if let Ok(bytes) = disk_cache.get(&filename) {

View file

@ -0,0 +1,12 @@
// export const a = "a";
// export enum A {
// A,
// B,
// C,
// }
import * as a from "data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=";
console.log(a.a);
console.log(a.A);
console.log(a.A.A);

View file

@ -0,0 +1,3 @@
a
{ "0": "A", "1": "B", "2": "C", A: 0, B: 1, C: 2 }
0

View file

@ -0,0 +1,3 @@
import { a } from "data:application/typescript;base64,ZW51bSBBIHsKICBBLAogIEIsCiAgQywKIH0KIAogZXhwb3J0IGZ1bmN0aW9uIGEoKSB7CiAgIHRocm93IG5ldyBFcnJvcihgSGVsbG8gJHtBLkN9YCk7CiB9CiA=";
a();

View file

@ -0,0 +1,5 @@
error: Uncaught Error: Hello 2
throw new Error(`Hello ${A.C}`);
^
at a (72554b3efdc211ba4aa0b62629589f048e7d4afe7b0576f35ff340ce0ea8f9b8.ts:8:10)
at import_data_url_error_stack.ts:3:1

View file

@ -0,0 +1,4 @@
// export { a } from "./a.ts";
import * as a from "data:application/javascript;base64,ZXhwb3J0IHsgYSB9IGZyb20gIi4vYS50cyI7Cg==";
console.log(a);

View file

@ -0,0 +1 @@
error: invalid URL: relative URL with a cannot-be-a-base base

View file

@ -0,0 +1,4 @@
// export { printHello } from "http://localhost:4545/cli/tests/subdir/mod2.ts";
import { printHello } from "data:application/typescript;base64,ZXhwb3J0IHsgcHJpbnRIZWxsbyB9IGZyb20gImh0dHA6Ly9sb2NhbGhvc3Q6NDU0NS9jbGkvdGVzdHMvc3ViZGlyL21vZDIudHMiOwo=";
printHello();

View file

@ -0,0 +1 @@
Hello

View file

@ -0,0 +1,10 @@
import render from "data:text/jsx;base64,ZXhwb3J0IGRlZmF1bHQgZnVuY3Rpb24oKSB7CiAgcmV0dXJuIDxkaXY+SGVsbG8gRGVubyE8L2Rpdj4KfQo=";
// deno-lint-ignore no-explicit-any
(globalThis as any).React = {
createElement(...args: unknown[]) {
console.log(...args);
},
};
render();

View file

@ -0,0 +1 @@
div null Hello Deno!

View file

@ -0,0 +1,14 @@
// export const a = "a";
// export enum A {
// A,
// B,
// C,
// }
const a = await import(
"data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo="
);
console.log(a.a);
console.log(a.A);
console.log(a.A.A);

View file

@ -0,0 +1,3 @@
a
{ "0": "A", "1": "B", "2": "C", A: 0, B: 1, C: 2 }
0

View file

@ -3412,6 +3412,39 @@ itest!(deno_doc_import_map {
output: "doc/use_import_map.out",
});
itest!(import_data_url_error_stack {
args: "run --quiet --reload import_data_url_error_stack.ts",
output: "import_data_url_error_stack.ts.out",
exit_code: 1,
});
itest!(import_data_url_import_relative {
args: "run --quiet --reload import_data_url_import_relative.ts",
output: "import_data_url_import_relative.ts.out",
exit_code: 1,
});
itest!(import_data_url_imports {
args: "run --quiet --reload import_data_url_imports.ts",
output: "import_data_url_imports.ts.out",
http_server: true,
});
itest!(import_data_url_jsx {
args: "run --quiet --reload import_data_url_jsx.ts",
output: "import_data_url_jsx.ts.out",
});
itest!(import_data_url {
args: "run --quiet --reload import_data_url.ts",
output: "import_data_url.ts.out",
});
itest!(import_dynamic_data_url {
args: "run --quiet --reload import_dynamic_data_url.ts",
output: "import_dynamic_data_url.ts.out",
});
itest!(import_file_with_colon {
args: "run --quiet --reload import_file_with_colon.ts",
output: "import_file_with_colon.ts.out",

View file

@ -1,5 +1,6 @@
error: Uncaught (in promise) TypeError: Unsupported scheme "xxx" for module "xxx:". Supported schemes: [
"data",
"file",
"http",
"https",
"file",
]

View file

@ -46,6 +46,27 @@ Deno.test({
},
});
Deno.test({
name: "worker from data url",
async fn() {
const promise = deferred();
const tsWorker = new Worker(
"data:application/typescript;base64,aWYgKHNlbGYubmFtZSAhPT0gInRzV29ya2VyIikgewogIHRocm93IEVycm9yKGBJbnZhbGlkIHdvcmtlciBuYW1lOiAke3NlbGYubmFtZX0sIGV4cGVjdGVkIHRzV29ya2VyYCk7Cn0KCm9ubWVzc2FnZSA9IGZ1bmN0aW9uIChlKTogdm9pZCB7CiAgcG9zdE1lc3NhZ2UoZS5kYXRhKTsKICBjbG9zZSgpOwp9Owo=",
{ type: "module", name: "tsWorker" },
);
tsWorker.onmessage = (e): void => {
assertEquals(e.data, "Hello World");
promise.resolve();
};
tsWorker.postMessage("Hello World");
await promise;
tsWorker.terminate();
},
});
Deno.test({
name: "worker nested",
fn: async function (): Promise<void> {

View file

@ -82,6 +82,19 @@ fn get_maybe_hash(
}
}
fn hash_data_url(
specifier: &ModuleSpecifier,
media_type: &MediaType,
) -> String {
assert_eq!(
specifier.as_url().scheme(),
"data",
"Specifier must be a data: specifier."
);
let hash = crate::checksum::gen(&[specifier.as_url().path().as_bytes()]);
format!("data:///{}{}", hash, media_type.as_ts_extension())
}
/// tsc only supports `.ts`, `.tsx`, `.d.ts`, `.js`, or `.jsx` as root modules
/// and so we have to detect the apparent media type based on extensions it
/// supports.
@ -152,7 +165,9 @@ pub struct Response {
pub stats: Stats,
}
#[derive(Debug)]
struct State {
data_url_map: HashMap<String, ModuleSpecifier>,
hash_data: Vec<Vec<u8>>,
emitted_files: Vec<EmittedFile>,
graph: Arc<Mutex<Graph>>,
@ -167,10 +182,12 @@ impl State {
hash_data: Vec<Vec<u8>>,
maybe_tsbuildinfo: Option<String>,
root_map: HashMap<String, ModuleSpecifier>,
data_url_map: HashMap<String, ModuleSpecifier>,
) -> Self {
State {
data_url_map,
hash_data,
emitted_files: Vec::new(),
emitted_files: Default::default(),
graph,
maybe_tsbuildinfo,
maybe_response: None,
@ -231,7 +248,9 @@ fn emit(state: &mut State, args: Value) -> Result<Value, AnyError> {
let specifiers = specifiers
.iter()
.map(|s| {
if let Some(remapped_specifier) = state.root_map.get(s) {
if let Some(data_specifier) = state.data_url_map.get(s) {
data_specifier.clone()
} else if let Some(remapped_specifier) = state.root_map.get(s) {
remapped_specifier.clone()
} else {
ModuleSpecifier::resolve_url_or_path(s).unwrap()
@ -278,12 +297,15 @@ fn load(state: &mut State, args: Value) -> Result<Value, AnyError> {
maybe_source
} else {
let graph = state.graph.lock().unwrap();
let specifier =
if let Some(remapped_specifier) = state.root_map.get(&v.specifier) {
remapped_specifier.clone()
} else {
specifier
};
let specifier = if let Some(data_specifier) =
state.data_url_map.get(&v.specifier)
{
data_specifier.clone()
} else if let Some(remapped_specifier) = state.root_map.get(&v.specifier) {
remapped_specifier.clone()
} else {
specifier
};
let maybe_source = graph.get_source(&specifier);
media_type = if let Some(media_type) = graph.get_media_type(&specifier) {
media_type
@ -313,7 +335,9 @@ fn resolve(state: &mut State, args: Value) -> Result<Value, AnyError> {
let v: ResolveArgs = serde_json::from_value(args)
.context("Invalid request from JavaScript for \"op_resolve\".")?;
let mut resolved: Vec<(String, String)> = Vec::new();
let referrer = if let Some(remapped_base) = state.root_map.get(&v.base) {
let referrer = if let Some(data_specifier) = state.data_url_map.get(&v.base) {
data_specifier.clone()
} else if let Some(remapped_base) = state.root_map.get(&v.base) {
remapped_base.clone()
} else {
ModuleSpecifier::resolve_url_or_path(&v.base).context(
@ -340,10 +364,18 @@ fn resolve(state: &mut State, args: Value) -> Result<Value, AnyError> {
resolved_specifier
)
};
resolved.push((
resolved_specifier.to_string(),
media_type.as_ts_extension(),
));
let resolved_specifier_str = if resolved_specifier.as_url().scheme()
== "data"
{
let specifier_str = hash_data_url(&resolved_specifier, &media_type);
state
.data_url_map
.insert(specifier_str.clone(), resolved_specifier);
specifier_str
} else {
resolved_specifier.to_string()
};
resolved.push((resolved_specifier_str, media_type.as_ts_extension()));
}
// in certain situations, like certain dynamic imports, we won't have
// the source file in the graph, so we will return a fake module to
@ -384,17 +416,24 @@ pub fn exec(request: Request) -> Result<Response, AnyError> {
// extensions and remap any that are unacceptable to tsc and add them to the
// op state so when requested, we can remap to the original specifier.
let mut root_map = HashMap::new();
let mut data_url_map = HashMap::new();
let root_names: Vec<String> = request
.root_names
.iter()
.map(|(s, mt)| {
let ext_media_type = get_tsc_media_type(s);
if mt != &ext_media_type {
let new_specifier = format!("{}{}", s, mt.as_ts_extension());
root_map.insert(new_specifier.clone(), s.clone());
new_specifier
if s.as_url().scheme() == "data" {
let specifier_str = hash_data_url(s, mt);
data_url_map.insert(specifier_str.clone(), s.clone());
specifier_str
} else {
s.as_str().to_owned()
let ext_media_type = get_tsc_media_type(s);
if mt != &ext_media_type {
let new_specifier = format!("{}{}", s, mt.as_ts_extension());
root_map.insert(new_specifier.clone(), s.clone());
new_specifier
} else {
s.as_str().to_owned()
}
}
})
.collect();
@ -407,6 +446,7 @@ pub fn exec(request: Request) -> Result<Response, AnyError> {
request.hash_data.clone(),
request.maybe_tsbuildinfo.clone(),
root_map,
data_url_map,
));
}
@ -484,7 +524,13 @@ mod tests {
.await
.expect("module not inserted");
let graph = Arc::new(Mutex::new(builder.get_graph()));
State::new(graph, hash_data, maybe_tsbuildinfo, HashMap::new())
State::new(
graph,
hash_data,
maybe_tsbuildinfo,
HashMap::new(),
HashMap::new(),
)
}
async fn test_exec(
@ -559,6 +605,15 @@ mod tests {
);
}
#[test]
fn test_hash_data_url() {
let specifier = ModuleSpecifier::resolve_url(
"data:application/javascript,console.log(\"Hello%20Deno\");",
)
.unwrap();
assert_eq!(hash_data_url(&specifier, &MediaType::JavaScript), "data:///d300ea0796bd72b08df10348e0b70514c021f2e45bfe59cec24e12e97cd79c58.js");
}
#[test]
fn test_get_tsc_media_type() {
let fixtures = vec![

View file

@ -580,16 +580,16 @@ impl Permissions {
specifier: &ModuleSpecifier,
) -> Result<(), AnyError> {
let url = specifier.as_url();
if url.scheme() == "file" {
match url.to_file_path() {
match url.scheme() {
"file" => match url.to_file_path() {
Ok(path) => self.check_read(&path),
Err(_) => Err(uri_error(format!(
"Invalid file path.\n Specifier: {}",
specifier
))),
}
} else {
self.check_net_url(url)
},
"data" => Ok(()),
_ => self.check_net_url(url),
}
}
@ -920,6 +920,13 @@ mod tests {
.unwrap(),
false,
),
(
ModuleSpecifier::resolve_url_or_path(
"data:text/plain,Hello%2C%20Deno!",
)
.unwrap(),
true,
),
];
if cfg!(target_os = "windows") {