fix(cli): add support for non-UTF8 source files (#6789)

Fixes: #5542
This commit is contained in:
Maayan Hanin 2020-08-04 00:39:48 +03:00 committed by GitHub
parent d615ebefe2
commit 5fc5e7b54a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 422 additions and 99 deletions

View File

@ -14,6 +14,7 @@
"excludes": [
".cargo_home",
"cli/dts",
"cli/tests/encoding",
"cli/tsc/*typescript.js",
"gh-pages",
"std/**/testdata",

2
.gitattributes vendored
View File

@ -2,6 +2,8 @@
* text=auto eol=lf
*.png -text
/cli/tests/encoding/* -text
# Tell git which symlinks point to files, and which ones point to directories.
# This is relevant for Windows only, and requires git >= 2.19.2 to work.
/core/libdeno/* symlink=dir

1
Cargo.lock generated
View File

@ -385,6 +385,7 @@ dependencies = [
"dissimilar",
"dlopen",
"dprint-plugin-typescript",
"encoding_rs",
"futures",
"fwdansi",
"http",

View File

@ -32,6 +32,7 @@ byteorder = "1.3.4"
clap = "2.33.1"
dissimilar = "1.0.2"
dlopen = "0.1.8"
encoding_rs = "0.8.23"
dprint-plugin-typescript = "0.25.0"
futures = "0.3.5"
http = "0.2.1"

View File

@ -7,10 +7,12 @@ use crate::http_util::FetchOnceResult;
use crate::msg;
use crate::op_error::OpError;
use crate::permissions::Permissions;
use crate::text_encoding;
use deno_core::ErrBox;
use deno_core::ModuleSpecifier;
use futures::future::FutureExt;
use log::info;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fs;
use std::future::Future;
@ -24,6 +26,47 @@ use std::sync::Arc;
use std::sync::Mutex;
use url::Url;
/// Structure representing a text document.
#[derive(Debug, Clone)]
pub struct TextDocument {
bytes: Vec<u8>,
charset: Cow<'static, str>,
}
impl TextDocument {
pub fn new(
bytes: Vec<u8>,
charset: Option<impl Into<Cow<'static, str>>>,
) -> TextDocument {
let charset = charset
.map(|cs| cs.into())
.unwrap_or_else(|| text_encoding::detect_charset(&bytes).into());
TextDocument { bytes, charset }
}
pub fn as_bytes(&self) -> &Vec<u8> {
&self.bytes
}
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
}
pub fn to_str(&self) -> Result<Cow<str>, std::io::Error> {
text_encoding::convert_to_utf8(&self.bytes, &self.charset)
}
pub fn to_string(&self) -> Result<String, std::io::Error> {
self.to_str().map(String::from)
}
}
impl From<Vec<u8>> for TextDocument {
fn from(bytes: Vec<u8>) -> Self {
TextDocument::new(bytes, Option::<&str>::None)
}
}
/// Structure representing local or remote file.
///
/// In case of remote file `url` might be different than originally requested URL, if so
@ -34,7 +77,7 @@ pub struct SourceFile {
pub filename: PathBuf,
pub types_header: Option<String>,
pub media_type: msg::MediaType,
pub source_code: Vec<u8>,
pub source_code: TextDocument,
}
/// Simple struct implementing in-process caching to prevent multiple
@ -180,8 +223,9 @@ impl SourceFileFetcher {
match result {
Ok(mut file) => {
// TODO: move somewhere?
if file.source_code.starts_with(b"#!") {
file.source_code = filter_shebang(file.source_code);
if file.source_code.bytes.starts_with(b"#!") {
file.source_code =
filter_shebang(&file.source_code.to_str().unwrap()[..]).into();
}
// Cache in-process for subsequent access.
@ -313,12 +357,12 @@ impl SourceFileFetcher {
Err(e) => return Err(e.into()),
};
let media_type = map_content_type(&filepath, None);
let (media_type, charset) = map_content_type(&filepath, None);
Ok(SourceFile {
url: module_url.clone(),
filename: filepath,
media_type,
source_code,
source_code: TextDocument::new(source_code, charset),
types_header: None,
})
}
@ -380,7 +424,7 @@ impl SourceFileFetcher {
let cache_filename = self.http_cache.get_cache_filename(module_url);
let fake_filepath = PathBuf::from(module_url.path());
let media_type = map_content_type(
let (media_type, charset) = map_content_type(
&fake_filepath,
headers.get("content-type").map(|e| e.as_str()),
);
@ -389,7 +433,7 @@ impl SourceFileFetcher {
url: module_url.clone(),
filename: cache_filename,
media_type,
source_code,
source_code: TextDocument::new(source_code, charset),
types_header,
}))
}
@ -490,7 +534,7 @@ impl SourceFileFetcher {
let cache_filepath = dir.http_cache.get_cache_filename(&module_url);
// Used to sniff out content type from file extension - probably to be removed
let fake_filepath = PathBuf::from(module_url.path());
let media_type = map_content_type(
let (media_type, charset) = map_content_type(
&fake_filepath,
headers.get("content-type").map(String::as_str),
);
@ -502,7 +546,7 @@ impl SourceFileFetcher {
url: module_url.clone(),
filename: cache_filepath,
media_type,
source_code: source,
source_code: TextDocument::new(source, charset),
types_header,
};
@ -532,16 +576,19 @@ pub fn map_file_extension(path: &Path) -> msg::MediaType {
}
}
// convert a ContentType string into a enumerated MediaType
fn map_content_type(path: &Path, content_type: Option<&str>) -> msg::MediaType {
// convert a ContentType string into a enumerated MediaType + optional charset
fn map_content_type(
path: &Path,
content_type: Option<&str>,
) -> (msg::MediaType, Option<String>) {
match content_type {
Some(content_type) => {
// sometimes there is additional data after the media type in
// Sometimes there is additional data after the media type in
// Content-Type so we have to do a bit of manipulation so we are only
// dealing with the actual media type
let ct_vector: Vec<&str> = content_type.split(';').collect();
let ct: &str = ct_vector.first().unwrap();
match ct.to_lowercase().as_ref() {
// dealing with the actual media type.
let mut ct_iter = content_type.split(';');
let ct = ct_iter.next().unwrap();
let media_type = match ct.to_lowercase().as_ref() {
"application/typescript"
| "text/typescript"
| "video/vnd.dlna.mpeg-tts"
@ -565,9 +612,16 @@ fn map_content_type(path: &Path, content_type: Option<&str>) -> msg::MediaType {
debug!("unknown content type: {}", content_type);
msg::MediaType::Unknown
}
}
};
let charset = ct_iter
.map(str::trim)
.find_map(|s| s.strip_prefix("charset="))
.map(String::from);
(media_type, charset)
}
None => map_file_extension(path),
None => (map_file_extension(path), None),
}
}
@ -586,8 +640,7 @@ fn map_js_like_extension(
}
}
fn filter_shebang(bytes: Vec<u8>) -> Vec<u8> {
let string = str::from_utf8(&bytes).unwrap();
fn filter_shebang(string: &str) -> Vec<u8> {
if let Some(i) = string.find('\n') {
let (_, rest) = string.split_at(i);
rest.as_bytes().to_owned()
@ -767,7 +820,7 @@ mod tests {
assert!(result.is_ok());
let r = result.unwrap();
assert_eq!(
r.source_code,
r.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
@ -794,7 +847,7 @@ mod tests {
assert!(result2.is_ok());
let r2 = result2.unwrap();
assert_eq!(
r2.source_code,
r2.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
// If get_source_file does not call remote, this should be JavaScript
@ -823,7 +876,7 @@ mod tests {
assert!(result3.is_ok());
let r3 = result3.unwrap();
assert_eq!(
r3.source_code,
r3.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
// If get_source_file does not call remote, this should be JavaScript
@ -850,7 +903,7 @@ mod tests {
assert!(result4.is_ok());
let r4 = result4.unwrap();
let expected4 = &b"export { printHello } from \"./print_hello.ts\";\n"[..];
assert_eq!(r4.source_code, expected4);
assert_eq!(r4.source_code.bytes, expected4);
// Resolved back to TypeScript
assert_eq!(&(r4.media_type), &msg::MediaType::TypeScript);
@ -880,7 +933,7 @@ mod tests {
assert!(result.is_ok());
let r = result.unwrap();
let expected = b"export const loaded = true;\n";
assert_eq!(r.source_code, expected);
assert_eq!(r.source_code.bytes, expected);
assert_eq!(&(r.media_type), &msg::MediaType::JavaScript);
let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/javascript");
@ -906,7 +959,7 @@ mod tests {
assert!(result2.is_ok());
let r2 = result2.unwrap();
let expected2 = b"export const loaded = true;\n";
assert_eq!(r2.source_code, expected2);
assert_eq!(r2.source_code.bytes, expected2);
// If get_source_file does not call remote, this should be TypeScript
// as we modified before! (we do not overwrite .headers.json due to no http
// fetch)
@ -932,7 +985,7 @@ mod tests {
assert!(result3.is_ok());
let r3 = result3.unwrap();
let expected3 = b"export const loaded = true;\n";
assert_eq!(r3.source_code, expected3);
assert_eq!(r3.source_code.bytes, expected3);
// Now the old .headers.json file should be overwritten back to JavaScript!
// (due to http fetch)
assert_eq!(&(r3.media_type), &msg::MediaType::JavaScript);
@ -1352,7 +1405,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r = result.unwrap();
assert_eq!(r.source_code, b"export const loaded = true;\n");
assert_eq!(r.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
// Modify .metadata.json, make sure read from local
@ -1368,7 +1421,7 @@ mod tests {
let result2 = fetcher.fetch_cached_remote_source(&module_url, 1);
assert!(result2.is_ok());
let r2 = result2.unwrap().unwrap();
assert_eq!(r2.source_code, b"export const loaded = true;\n");
assert_eq!(r2.source_code.bytes, b"export const loaded = true;\n");
// Not MediaType::TypeScript due to .headers.json modification
assert_eq!(&(r2.media_type), &msg::MediaType::JavaScript);
@ -1392,7 +1445,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r = result.unwrap();
assert_eq!(r.source_code, b"export const loaded = true;\n");
assert_eq!(r.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/typescript");
@ -1417,7 +1470,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r2 = result.unwrap();
assert_eq!(r2.source_code, b"export const loaded = true;\n");
assert_eq!(r2.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r2.media_type), &msg::MediaType::JavaScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/javascript");
@ -1442,7 +1495,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r3 = result.unwrap();
assert_eq!(r3.source_code, b"export const loaded = true;\n");
assert_eq!(r3.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r3.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/typescript");
@ -1523,6 +1576,63 @@ mod tests {
}
}
async fn test_fetch_source_file_from_disk_nonstandard_encoding(
charset: &str,
expected_content: String,
) {
let (_temp_dir, fetcher) = test_setup();
let p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(format!("tests/encoding/{}.ts", charset));
let specifier =
ModuleSpecifier::resolve_url_or_path(p.to_str().unwrap()).unwrap();
let r = fetcher
.fetch_source_file(&specifier, None, Permissions::allow_all())
.await;
assert!(r.is_ok());
let fetched_file = r.unwrap();
let source_code = fetched_file.source_code.to_str();
assert!(source_code.is_ok());
let actual = source_code.unwrap();
assert_eq!(expected_content, actual);
}
#[tokio::test]
async fn test_fetch_source_file_from_disk_utf_16_be() {
test_fetch_source_file_from_disk_nonstandard_encoding(
"utf-16be",
String::from_utf8(
b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
)
.unwrap(),
)
.await;
}
#[tokio::test]
async fn test_fetch_source_file_from_disk_utf_16_le() {
test_fetch_source_file_from_disk_nonstandard_encoding(
"utf-16le",
String::from_utf8(
b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
)
.unwrap(),
)
.await;
}
#[tokio::test]
async fn test_fetch_source_file_from_disk_utf_8_with_bom() {
test_fetch_source_file_from_disk_nonstandard_encoding(
"utf-8",
String::from_utf8(
b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
)
.unwrap(),
)
.await;
}
#[test]
fn test_map_file_extension() {
assert_eq!(
@ -1571,43 +1681,43 @@ mod tests {
fn test_map_content_type_extension_only() {
// Extension only
assert_eq!(
map_content_type(Path::new("foo/bar.ts"), None),
map_content_type(Path::new("foo/bar.ts"), None).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar.tsx"), None),
map_content_type(Path::new("foo/bar.tsx"), None).0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(Path::new("foo/bar.d.ts"), None),
map_content_type(Path::new("foo/bar.d.ts"), None).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar.js"), None),
map_content_type(Path::new("foo/bar.js"), None).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar.txt"), None),
map_content_type(Path::new("foo/bar.txt"), None).0,
msg::MediaType::Unknown
);
assert_eq!(
map_content_type(Path::new("foo/bar.jsx"), None),
map_content_type(Path::new("foo/bar.jsx"), None).0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(Path::new("foo/bar.json"), None),
map_content_type(Path::new("foo/bar.json"), None).0,
msg::MediaType::Json
);
assert_eq!(
map_content_type(Path::new("foo/bar.wasm"), None),
map_content_type(Path::new("foo/bar.wasm"), None).0,
msg::MediaType::Wasm
);
assert_eq!(
map_content_type(Path::new("foo/bar.cjs"), None),
map_content_type(Path::new("foo/bar.cjs"), None).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), None),
map_content_type(Path::new("foo/bar"), None).0,
msg::MediaType::Unknown
);
}
@ -1616,140 +1726,154 @@ mod tests {
fn test_map_content_type_media_type_with_no_extension() {
// Media Type
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/typescript")),
map_content_type(Path::new("foo/bar"), Some("application/typescript")).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("text/typescript")),
map_content_type(Path::new("foo/bar"), Some("text/typescript")).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("video/vnd.dlna.mpeg-tts")),
map_content_type(Path::new("foo/bar"), Some("video/vnd.dlna.mpeg-tts")).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("video/mp2t")),
map_content_type(Path::new("foo/bar"), Some("video/mp2t")).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/x-typescript")),
map_content_type(Path::new("foo/bar"), Some("application/x-typescript"))
.0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/javascript")),
map_content_type(Path::new("foo/bar"), Some("application/javascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("text/javascript")),
map_content_type(Path::new("foo/bar"), Some("text/javascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/ecmascript")),
map_content_type(Path::new("foo/bar"), Some("application/ecmascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("text/ecmascript")),
map_content_type(Path::new("foo/bar"), Some("text/ecmascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/x-javascript")),
map_content_type(Path::new("foo/bar"), Some("application/x-javascript"))
.0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/json")),
map_content_type(Path::new("foo/bar"), Some("application/json")).0,
msg::MediaType::Json
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("application/node")),
map_content_type(Path::new("foo/bar"), Some("application/node")).0,
msg::MediaType::JavaScript
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("text/json")),
map_content_type(Path::new("foo/bar"), Some("text/json")).0,
msg::MediaType::Json
);
assert_eq!(
map_content_type(Path::new("foo/bar"), Some("text/json; charset=utf-8 ")),
(msg::MediaType::Json, Some("utf-8".to_owned()))
);
}
#[test]
fn test_map_file_extension_media_type_with_extension() {
assert_eq!(
map_content_type(Path::new("foo/bar.ts"), Some("text/plain")),
map_content_type(Path::new("foo/bar.ts"), Some("text/plain")).0,
msg::MediaType::TypeScript
);
assert_eq!(
map_content_type(Path::new("foo/bar.ts"), Some("foo/bar")),
map_content_type(Path::new("foo/bar.ts"), Some("foo/bar")).0,
msg::MediaType::Unknown
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/typescript"),
),
)
.0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/javascript"),
),
)
.0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/x-typescript"),
),
)
.0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("video/vnd.dlna.mpeg-tts"),
),
)
.0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(Path::new("foo/bar.tsx"), Some("video/mp2t")),
map_content_type(Path::new("foo/bar.tsx"), Some("video/mp2t")).0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/javascript"),
),
)
.0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/x-typescript"),
),
)
.0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/ecmascript"),
),
)
.0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(Path::new("foo/bar.jsx"), Some("text/ecmascript")),
map_content_type(Path::new("foo/bar.jsx"), Some("text/ecmascript")).0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/x-javascript"),
),
)
.0,
msg::MediaType::JSX
);
}
#[test]
fn test_filter_shebang() {
assert_eq!(filter_shebang(b"#!"[..].to_owned()), b"");
assert_eq!(filter_shebang(b"#!\n\n"[..].to_owned()), b"\n\n");
let code = b"#!/usr/bin/env deno\nconsole.log('hello');\n"[..].to_owned();
assert_eq!(filter_shebang("#!"), b"");
assert_eq!(filter_shebang("#!\n\n"), b"\n\n");
let code = "#!/usr/bin/env deno\nconsole.log('hello');\n";
assert_eq!(filter_shebang(code), b"\nconsole.log('hello');\n");
}
@ -1771,7 +1895,7 @@ mod tests {
.await;
assert!(source.is_ok());
let source = source.unwrap();
assert_eq!(source.source_code, b"console.log('etag')");
assert_eq!(source.source_code.bytes, b"console.log('etag')");
assert_eq!(&(source.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
@ -1798,7 +1922,7 @@ mod tests {
)
.await
.unwrap();
assert_eq!(cached_source.source_code, b"changed content");
assert_eq!(cached_source.source_code.bytes, b"changed content");
let modified2 = metadata_path.metadata().unwrap().modified().unwrap();
@ -1825,7 +1949,7 @@ mod tests {
.await;
assert!(source.is_ok());
let source = source.unwrap();
assert_eq!(source.source_code, b"export const foo = 'foo';");
assert_eq!(source.source_code.bytes, b"export const foo = 'foo';");
assert_eq!(&(source.media_type), &msg::MediaType::JavaScript);
assert_eq!(
source.types_header,
@ -1833,4 +1957,80 @@ mod tests {
);
drop(http_server_guard);
}
#[tokio::test]
async fn test_fetch_source_file_from_net_utf16_le() {
let content =
std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A")
.unwrap();
test_fetch_non_utf8_source_file_from_net(
"utf-16le",
"utf-16le.ts",
content,
)
.await;
}
#[tokio::test]
async fn test_fetch_source_file_from_net_utf16_be() {
let content =
std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A")
.unwrap();
test_fetch_non_utf8_source_file_from_net(
"utf-16be",
"utf-16be.ts",
content,
)
.await;
}
#[tokio::test]
async fn test_fetch_source_file_from_net_windows_1255() {
let content = "console.log(\"\u{5E9}\u{5DC}\u{5D5}\u{5DD} \
\u{5E2}\u{5D5}\u{5DC}\u{5DD}\");\u{A}";
test_fetch_non_utf8_source_file_from_net(
"windows-1255",
"windows-1255",
content,
)
.await;
}
async fn test_fetch_non_utf8_source_file_from_net(
charset: &str,
file_name: &str,
expected_content: &str,
) {
let http_server_guard = test_util::http_server();
let (_temp_dir, fetcher) = test_setup();
let module_url = Url::parse(&format!(
"http://127.0.0.1:4545/cli/tests/encoding/{}",
file_name
))
.unwrap();
let source = fetcher
.fetch_remote_source(
&module_url,
false,
false,
1,
&Permissions::allow_all(),
)
.await;
assert!(source.is_ok());
let source = source.unwrap();
assert_eq!(&source.source_code.charset.to_lowercase()[..], charset);
let text = &source.source_code.to_str().unwrap();
assert_eq!(text, expected_content);
assert_eq!(&(source.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
assert_eq!(
headers.get("content-type").unwrap(),
&format!("application/typescript;charset={}", charset)
);
drop(http_server_guard);
}
}

View File

@ -11,6 +11,7 @@ use crate::colors;
use crate::diff::diff;
use crate::fs::files_in_subtree;
use crate::op_error::OpError;
use crate::text_encoding;
use deno_core::ErrBox;
use dprint_plugin_typescript as dprint;
use std::fs;
@ -247,13 +248,15 @@ struct FileContents {
}
fn read_file_contents(file_path: &PathBuf) -> Result<FileContents, ErrBox> {
let file_text = fs::read_to_string(&file_path)?;
let file_bytes = fs::read(&file_path)?;
let charset = text_encoding::detect_charset(&file_bytes);
let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
let had_bom = file_text.starts_with(BOM_CHAR);
let text = if had_bom {
// remove the BOM
String::from(&file_text[BOM_CHAR.len_utf8()..])
} else {
file_text
String::from(file_text)
};
Ok(FileContents { text, had_bom })

View File

@ -250,7 +250,7 @@ impl GlobalState {
}
} else {
CompiledModule {
code: String::from_utf8(out.source_code.clone())?,
code: out.source_code.to_string()?,
name: out.url.to_string(),
}
};

View File

@ -11,6 +11,7 @@ extern crate futures;
extern crate serde_json;
extern crate clap;
extern crate deno_core;
extern crate encoding_rs;
extern crate indexmap;
#[cfg(unix)]
extern crate nix;
@ -60,6 +61,7 @@ mod startup_data;
pub mod state;
mod swc_util;
mod test_runner;
mod text_encoding;
mod tokio_util;
mod tsc;
mod upgrade;
@ -70,6 +72,7 @@ pub mod worker;
use crate::doc::parser::DocFileLoader;
use crate::file_fetcher::SourceFile;
use crate::file_fetcher::SourceFileFetcher;
use crate::file_fetcher::TextDocument;
use crate::fs as deno_fs;
use crate::global_state::GlobalState;
use crate::msg::MediaType;
@ -412,7 +415,7 @@ async fn eval_command(
} else {
MediaType::JavaScript
},
source_code,
source_code: TextDocument::new(source_code, Some("utf-8")),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)
@ -525,8 +528,7 @@ async fn doc_command(
let source_file = fetcher
.fetch_source_file(&specifier, None, Permissions::allow_all())
.await?;
String::from_utf8(source_file.source_code)
.map_err(|_| OpError::other("failed to parse".to_string()))
source_file.source_code.to_string().map_err(OpError::from)
}
.boxed_local()
}
@ -601,7 +603,7 @@ async fn run_command(flags: Flags, script: String) -> Result<(), ErrBox> {
url: main_module_url,
types_header: None,
media_type: MediaType::TypeScript,
source_code: source,
source_code: source.into(),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)
@ -657,7 +659,10 @@ async fn test_command(
url: test_file_url,
types_header: None,
media_type: MediaType::TypeScript,
source_code: test_file.clone().into_bytes(),
source_code: TextDocument::new(
test_file.clone().into_bytes(),
Some("utf-8"),
),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)

View File

@ -458,7 +458,7 @@ impl ModuleGraphLoader {
redirect: Some(source_file.url.to_string()),
filename: source_file.filename.to_str().unwrap().to_string(),
version_hash: checksum::gen(&[
&source_file.source_code,
&source_file.source_code.as_bytes(),
version::DENO.as_bytes(),
]),
media_type: source_file.media_type,
@ -473,9 +473,11 @@ impl ModuleGraphLoader {
}
let module_specifier = ModuleSpecifier::from(source_file.url.clone());
let version_hash =
checksum::gen(&[&source_file.source_code, version::DENO.as_bytes()]);
let source_code = String::from_utf8(source_file.source_code)?;
let version_hash = checksum::gen(&[
&source_file.source_code.as_bytes(),
version::DENO.as_bytes(),
]);
let source_code = source_file.source_code.to_string()?;
if SUPPORTED_MEDIA_TYPES.contains(&source_file.media_type) {
if let Some(types_specifier) = source_file.types_header {

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
console.log("Hello World");

View File

@ -0,0 +1 @@
console.log("ùìåí òåìí");

94
cli/text_encoding.rs Normal file
View File

@ -0,0 +1,94 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
use encoding_rs::*;
use std::{
borrow::Cow,
io::{Error, ErrorKind},
};
/// Attempts to detect the character encoding of the provided bytes.
///
/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
pub fn detect_charset(bytes: &'_ [u8]) -> &'static str {
const UTF16_LE_BOM: &[u8] = b"\xFF\xFE";
const UTF16_BE_BOM: &[u8] = b"\xFE\xFF";
if bytes.starts_with(UTF16_LE_BOM) {
"utf-16le"
} else if bytes.starts_with(UTF16_BE_BOM) {
"utf-16be"
} else {
// Assume everything else is utf-8
"utf-8"
}
}
/// Attempts to convert the provided bytes to a UTF-8 string.
///
/// Supports all encodings supported by the encoding_rs crate, which includes
/// all encodings specified in the WHATWG Encoding Standard, and only those
/// encodings (see: https://encoding.spec.whatwg.org/).
pub fn convert_to_utf8<'a>(
bytes: &'a [u8],
charset: &'_ str,
) -> Result<Cow<'a, str>, Error> {
match Encoding::for_label(charset.as_bytes()) {
Some(encoding) => encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or_else(|| ErrorKind::InvalidData.into()),
None => Err(Error::new(
ErrorKind::InvalidInput,
format!("Unsupported charset: {}", charset),
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_detection(test_data: &[u8], expected_charset: &str) {
let detected_charset = detect_charset(test_data);
assert_eq!(
expected_charset.to_lowercase(),
detected_charset.to_lowercase()
);
}
#[test]
fn test_detection_utf8_no_bom() {
let test_data = "Hello UTF-8 it is \u{23F0} for Deno!"
.to_owned()
.into_bytes();
test_detection(&test_data, "utf-8");
}
#[test]
fn test_detection_utf16_little_endian() {
let test_data = b"\xFF\xFEHello UTF-16LE".to_owned().to_vec();
test_detection(&test_data, "utf-16le");
}
#[test]
fn test_detection_utf16_big_endian() {
let test_data = b"\xFE\xFFHello UTF-16BE".to_owned().to_vec();
test_detection(&test_data, "utf-16be");
}
#[test]
fn test_decoding_unsupported_charset() {
let test_data = Vec::new();
let result = convert_to_utf8(&test_data, "utf-32le");
assert!(result.is_err());
let err = result.expect_err("Err expected");
assert!(err.kind() == ErrorKind::InvalidInput);
}
#[test]
fn test_decoding_invalid_utf8() {
let test_data = b"\xFE\xFE\xFF\xFF".to_vec();
let result = convert_to_utf8(&test_data, "utf-8");
assert!(result.is_err());
let err = result.expect_err("Err expected");
assert!(err.kind() == ErrorKind::InvalidData);
}
}

View File

@ -471,7 +471,7 @@ impl TsCompiler {
if let Some(metadata) = self.get_metadata(&url) {
// Compare version hashes
let version_hash_to_validate = source_code_version_hash(
&source_file.source_code,
&source_file.source_code.as_bytes(),
version::DENO,
&self.config.hash,
);
@ -512,7 +512,7 @@ impl TsCompiler {
.fetch_cached_source_file(&specifier, Permissions::allow_all())
{
let existing_hash = crate::checksum::gen(&[
&source_file.source_code,
&source_file.source_code.as_bytes(),
version::DENO.as_bytes(),
]);
let expected_hash =
@ -851,9 +851,7 @@ impl TsCompiler {
let compiled_source_file = self.get_compiled_source_file(module_url)?;
let compiled_module = CompiledModule {
code: str::from_utf8(&compiled_source_file.source_code)
.unwrap()
.to_string(),
code: compiled_source_file.source_code.to_string()?,
name: module_url.to_string(),
};
@ -861,8 +859,8 @@ impl TsCompiler {
}
/// Return compiled JS file for given TS module.
// TODO: ideally we shouldn't construct SourceFile by hand, but it should be delegated to
// SourceFileFetcher
// TODO: ideally we shouldn't construct SourceFile by hand, but it should be
// delegated to SourceFileFetcher.
pub fn get_compiled_source_file(
&self,
module_url: &Url,
@ -878,7 +876,7 @@ impl TsCompiler {
url: module_url.clone(),
filename: compiled_code_filename,
media_type: msg::MediaType::JavaScript,
source_code: compiled_code,
source_code: compiled_code.into(),
types_header: None,
};
@ -902,7 +900,7 @@ impl TsCompiler {
self.mark_compiled(module_specifier.as_url());
let version_hash = source_code_version_hash(
&source_file.source_code,
&source_file.source_code.as_bytes(),
version::DENO,
&self.config.hash,
);
@ -935,7 +933,7 @@ impl TsCompiler {
url: module_specifier.as_url().to_owned(),
filename: source_map_filename,
media_type: msg::MediaType::JavaScript,
source_code,
source_code: source_code.into(),
types_header: None,
};
@ -981,7 +979,7 @@ impl SourceMapGetter for TsCompiler {
self
.try_resolve_and_get_source_file(script_name)
.and_then(|out| {
str::from_utf8(&out.source_code).ok().map(|v| {
out.source_code.to_str().ok().map(|v| {
// Do NOT use .lines(): it skips the terminating empty line.
// (due to internally using .split_terminator() instead of .split())
let lines: Vec<&str> = v.split('\n').collect();
@ -1020,7 +1018,7 @@ impl TsCompiler {
) -> Option<Vec<u8>> {
if let Some(module_specifier) = self.try_to_resolve(script_name) {
return match self.get_source_map_file(&module_specifier) {
Ok(out) => Some(out.source_code),
Ok(out) => Some(out.source_code.into_bytes()),
Err(_) => {
// Check if map is inlined
if let Ok(compiled_source) =
@ -1566,7 +1564,7 @@ mod tests {
url: specifier.as_url().clone(),
filename: PathBuf::from(p.to_str().unwrap().to_string()),
media_type: msg::MediaType::TypeScript,
source_code: include_bytes!("./tests/002_hello.ts").to_vec(),
source_code: include_bytes!("./tests/002_hello.ts").to_vec().into(),
types_header: None,
};
let dir =
@ -1642,7 +1640,7 @@ mod tests {
url: specifier.as_url().clone(),
filename: PathBuf::from(p.to_str().unwrap().to_string()),
media_type: msg::MediaType::TypeScript,
source_code: include_bytes!("./tests/002_hello.ts").to_vec(),
source_code: include_bytes!("./tests/002_hello.ts").to_vec().into(),
types_header: None,
};
let dir =

View File

@ -430,6 +430,19 @@ fn custom_headers(path: warp::path::Peek, f: warp::fs::File) -> Box<dyn Reply> {
let f = with_header(f, "Content-Length", "39");
return Box::new(f);
}
if p.contains("cli/tests/encoding/") {
let charset = p
.split_terminator('/')
.last()
.unwrap()
.trim_end_matches(".ts");
let f = with_header(
f,
"Content-Type",
&format!("application/typescript;charset={}", charset)[..],
);
return Box::new(f);
}
let content_type = if p.contains(".t1.") {
Some("text/typescript")

View File

@ -69,6 +69,7 @@ def eslint():
":!:cli/compilers/wasm_wrap.js",
":!:cli/tests/error_syntax.js",
":!:cli/tests/lint/**",
":!:cli/tests/encoding/**",
":!:cli/dts/**",
":!:cli/tsc/*typescript.js",
])