refactor: move some CJS and ESM code analysis to ext/node (#18789)

This commit is contained in:
David Sherret 2023-04-21 16:38:10 -04:00 committed by GitHub
parent 065d8771ad
commit 4a33c349af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 655 additions and 604 deletions

8
cli/cache/node.rs vendored
View file

@ -42,14 +42,6 @@ pub struct NodeAnalysisCache {
}
impl NodeAnalysisCache {
#[cfg(test)]
pub fn new_in_memory() -> Self {
Self::new(CacheDB::in_memory(
&NODE_ANALYSIS_CACHE_DB,
crate::version::deno(),
))
}
pub fn new(db: CacheDB) -> Self {
Self {
inner: NodeAnalysisCacheInner::new(db),

View file

@ -11,9 +11,10 @@ use crate::graph_util::graph_valid_with_cli_options;
use crate::graph_util::ModuleGraphBuilder;
use crate::graph_util::ModuleGraphContainer;
use crate::node;
use crate::node::CliCjsEsmCodeAnalyzer;
use crate::node::CliNodeResolver;
use crate::node::NodeCodeTranslator;
use crate::node::NodeResolution;
use crate::npm::CliRequireNpmResolver;
use crate::proc_state::CjsResolutionStore;
use crate::proc_state::FileWatcherReporter;
use crate::proc_state::ProcState;
@ -49,7 +50,9 @@ use deno_graph::JsonModule;
use deno_graph::Module;
use deno_graph::Resolution;
use deno_lockfile::Lockfile;
use deno_runtime::deno_node::analyze::NodeCodeTranslator;
use deno_runtime::deno_node::NodeResolutionMode;
use deno_runtime::deno_node::RealFs;
use deno_runtime::permissions::PermissionsContainer;
use deno_semver::npm::NpmPackageReqReference;
use std::borrow::Cow;
@ -241,7 +244,8 @@ pub struct CliModuleLoader {
emitter: Arc<Emitter>,
graph_container: Arc<ModuleGraphContainer>,
module_load_preparer: Arc<ModuleLoadPreparer>,
node_code_translator: Arc<NodeCodeTranslator>,
node_code_translator:
Arc<NodeCodeTranslator<CliCjsEsmCodeAnalyzer, CliRequireNpmResolver>>,
node_resolver: Arc<CliNodeResolver>,
parsed_source_cache: Arc<ParsedSourceCache>,
resolver: Arc<CliGraphResolver>,
@ -385,17 +389,16 @@ impl CliModuleLoader {
self.root_permissions.clone()
};
// translate cjs to esm if it's cjs and inject node globals
self.node_code_translator.translate_cjs_to_esm(
self.node_code_translator.translate_cjs_to_esm::<RealFs>(
specifier,
code,
MediaType::Cjs,
&code,
&mut permissions,
)?
} else {
// only inject node globals for esm
self
.node_code_translator
.esm_code_with_node_globals(specifier, code)?
.esm_code_with_node_globals(specifier, &code)?
};
ModuleCodeSource {
code: code.into(),

View file

@ -1,11 +1,6 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
use std::collections::HashSet;
use std::collections::VecDeque;
use std::fmt::Write;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use deno_ast::swc::common::SyntaxContext;
use deno_ast::view::Node;
@ -15,195 +10,35 @@ use deno_ast::MediaType;
use deno_ast::ModuleSpecifier;
use deno_ast::ParsedSource;
use deno_ast::SourceRanged;
use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
use deno_runtime::deno_node::package_exports_resolve;
use deno_runtime::deno_node::NodeModuleKind;
use deno_runtime::deno_node::NodePermissions;
use deno_runtime::deno_node::NodeResolutionMode;
use deno_runtime::deno_node::PackageJson;
use deno_runtime::deno_node::PathClean;
use deno_runtime::deno_node::RealFs;
use deno_runtime::deno_node::RequireNpmResolver;
use deno_runtime::deno_node::NODE_GLOBAL_THIS_NAME;
use once_cell::sync::Lazy;
use deno_runtime::deno_node::analyze::CjsAnalysis as ExtNodeCjsAnalysis;
use deno_runtime::deno_node::analyze::CjsEsmCodeAnalyzer;
use crate::cache::NodeAnalysisCache;
use crate::file_fetcher::FileFetcher;
use crate::npm::NpmPackageResolver;
static NODE_GLOBALS: &[&str] = &[
"Buffer",
"clearImmediate",
"clearInterval",
"clearTimeout",
"console",
"global",
"process",
"setImmediate",
"setInterval",
"setTimeout",
];
pub struct NodeCodeTranslator {
analysis_cache: NodeAnalysisCache,
file_fetcher: Arc<FileFetcher>,
npm_resolver: Arc<NpmPackageResolver>,
pub struct CliCjsEsmCodeAnalyzer {
cache: NodeAnalysisCache,
}
impl NodeCodeTranslator {
pub fn new(
analysis_cache: NodeAnalysisCache,
file_fetcher: Arc<FileFetcher>,
npm_resolver: Arc<NpmPackageResolver>,
) -> Self {
Self {
analysis_cache,
file_fetcher,
npm_resolver,
}
impl CliCjsEsmCodeAnalyzer {
pub fn new(cache: NodeAnalysisCache) -> Self {
Self { cache }
}
pub fn esm_code_with_node_globals(
fn inner_cjs_analysis(
&self,
specifier: &ModuleSpecifier,
code: String,
) -> Result<String, AnyError> {
esm_code_with_node_globals(&self.analysis_cache, specifier, code)
}
/// Translates given CJS module into ESM. This function will perform static
/// analysis on the file to find defined exports and reexports.
///
/// For all discovered reexports the analysis will be performed recursively.
///
/// If successful a source code for equivalent ES module is returned.
pub fn translate_cjs_to_esm(
&self,
specifier: &ModuleSpecifier,
code: String,
media_type: MediaType,
permissions: &mut dyn NodePermissions,
) -> Result<String, AnyError> {
let mut temp_var_count = 0;
let mut handled_reexports: HashSet<String> = HashSet::default();
let mut source = vec![
r#"import {createRequire as __internalCreateRequire} from "node:module";
const require = __internalCreateRequire(import.meta.url);"#
.to_string(),
];
let analysis =
self.perform_cjs_analysis(specifier.as_str(), media_type, code)?;
let mut all_exports = analysis
.exports
.iter()
.map(|s| s.to_string())
.collect::<HashSet<_>>();
// (request, referrer)
let mut reexports_to_handle = VecDeque::new();
for reexport in analysis.reexports {
reexports_to_handle.push_back((reexport, specifier.clone()));
}
while let Some((reexport, referrer)) = reexports_to_handle.pop_front() {
if handled_reexports.contains(&reexport) {
continue;
}
handled_reexports.insert(reexport.to_string());
// First, resolve relate reexport specifier
let resolved_reexport = self.resolve(
&reexport,
&referrer,
// FIXME(bartlomieju): check if these conditions are okay, probably
// should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
&["deno", "require", "default"],
NodeResolutionMode::Execution,
permissions,
)?;
let reexport_specifier =
ModuleSpecifier::from_file_path(resolved_reexport).unwrap();
// Second, read the source code from disk
let reexport_file = self
.file_fetcher
.get_source(&reexport_specifier)
.ok_or_else(|| {
anyhow!(
"Could not find '{}' ({}) referenced from {}",
reexport,
reexport_specifier,
referrer
)
})?;
{
let analysis = self.perform_cjs_analysis(
reexport_specifier.as_str(),
reexport_file.media_type,
reexport_file.source.to_string(),
)?;
for reexport in analysis.reexports {
reexports_to_handle.push_back((reexport, reexport_specifier.clone()));
}
all_exports.extend(
analysis
.exports
.into_iter()
.filter(|e| e.as_str() != "default"),
);
}
}
source.push(format!(
"const mod = require(\"{}\");",
specifier
.to_file_path()
.unwrap()
.to_str()
.unwrap()
.replace('\\', "\\\\")
.replace('\'', "\\\'")
.replace('\"', "\\\"")
));
for export in &all_exports {
if export.as_str() != "default" {
add_export(
&mut source,
export,
&format!("mod[\"{export}\"]"),
&mut temp_var_count,
);
}
}
source.push("export default mod;".to_string());
let translated_source = source.join("\n");
Ok(translated_source)
}
fn perform_cjs_analysis(
&self,
specifier: &str,
media_type: MediaType,
code: String,
source: &str,
) -> Result<CjsAnalysis, AnyError> {
let source_hash = NodeAnalysisCache::compute_source_hash(&code);
let source_hash = NodeAnalysisCache::compute_source_hash(source);
if let Some(analysis) = self
.analysis_cache
.get_cjs_analysis(specifier, &source_hash)
.cache
.get_cjs_analysis(specifier.as_str(), &source_hash)
{
return Ok(analysis);
}
let media_type = MediaType::from_specifier(specifier);
if media_type == MediaType::Json {
return Ok(CjsAnalysis {
exports: vec![],
@ -213,7 +48,7 @@ impl NodeCodeTranslator {
let parsed_source = deno_ast::parse_script(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: deno_ast::SourceTextInfo::new(code.into()),
text_info: deno_ast::SourceTextInfo::new(source.into()),
media_type,
capture_tokens: true,
scope_analysis: false,
@ -221,175 +56,61 @@ impl NodeCodeTranslator {
})?;
let analysis = parsed_source.analyze_cjs();
self
.analysis_cache
.set_cjs_analysis(specifier, &source_hash, &analysis);
.cache
.set_cjs_analysis(specifier.as_str(), &source_hash, &analysis);
Ok(analysis)
}
}
fn resolve(
impl CjsEsmCodeAnalyzer for CliCjsEsmCodeAnalyzer {
fn analyze_cjs(
&self,
specifier: &str,
referrer: &ModuleSpecifier,
conditions: &[&str],
mode: NodeResolutionMode,
permissions: &mut dyn NodePermissions,
) -> Result<PathBuf, AnyError> {
if specifier.starts_with('/') {
todo!();
}
let referrer_path = referrer.to_file_path().unwrap();
if specifier.starts_with("./") || specifier.starts_with("../") {
if let Some(parent) = referrer_path.parent() {
return file_extension_probe(parent.join(specifier), &referrer_path);
} else {
todo!();
}
}
// We've got a bare specifier or maybe bare_specifier/blah.js"
let (package_specifier, package_subpath) =
parse_specifier(specifier).unwrap();
// todo(dsherret): use not_found error on not found here
let resolver = self.npm_resolver.as_require_npm_resolver();
let module_dir = resolver.resolve_package_folder_from_package(
package_specifier.as_str(),
&referrer_path,
mode,
)?;
let package_json_path = module_dir.join("package.json");
if package_json_path.exists() {
let package_json = PackageJson::load::<RealFs>(
&self.npm_resolver.as_require_npm_resolver(),
permissions,
package_json_path.clone(),
)?;
if let Some(exports) = &package_json.exports {
return package_exports_resolve::<RealFs>(
&package_json_path,
package_subpath,
exports,
referrer,
NodeModuleKind::Esm,
conditions,
mode,
&self.npm_resolver.as_require_npm_resolver(),
permissions,
);
}
// old school
if package_subpath != "." {
let d = module_dir.join(package_subpath);
if let Ok(m) = d.metadata() {
if m.is_dir() {
// subdir might have a package.json that specifies the entrypoint
let package_json_path = d.join("package.json");
if package_json_path.exists() {
let package_json = PackageJson::load::<RealFs>(
&self.npm_resolver.as_require_npm_resolver(),
permissions,
package_json_path,
)?;
if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(d.join(main).clean());
}
}
return Ok(d.join("index.js").clean());
}
}
return file_extension_probe(d, &referrer_path);
} else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(module_dir.join(main).clean());
} else {
return Ok(module_dir.join("index.js").clean());
}
}
Err(not_found(specifier, &referrer_path))
}
}
fn esm_code_with_node_globals(
analysis_cache: &NodeAnalysisCache,
specifier: &ModuleSpecifier,
code: String,
) -> Result<String, AnyError> {
// TODO(dsherret): this code is way more inefficient than it needs to be.
//
// In the future, we should disable capturing tokens & scope analysis
// and instead only use swc's APIs to go through the portions of the tree
// that we know will affect the global scope while still ensuring that
// `var` decls are taken into consideration.
let source_hash = NodeAnalysisCache::compute_source_hash(&code);
let text_info = deno_ast::SourceTextInfo::from_string(code);
let top_level_decls = if let Some(decls) =
analysis_cache.get_esm_analysis(specifier.as_str(), &source_hash)
{
HashSet::from_iter(decls)
} else {
let parsed_source = deno_ast::parse_program(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: text_info.clone(),
media_type: deno_ast::MediaType::from_specifier(specifier),
capture_tokens: true,
scope_analysis: true,
maybe_syntax: None,
})?;
let top_level_decls = analyze_top_level_decls(&parsed_source)?;
analysis_cache.set_esm_analysis(
specifier.as_str(),
&source_hash,
&top_level_decls.clone().into_iter().collect(),
);
top_level_decls
};
Ok(esm_code_from_top_level_decls(
text_info.text_str(),
&top_level_decls,
))
}
fn esm_code_from_top_level_decls(
file_text: &str,
top_level_decls: &HashSet<String>,
) -> String {
let mut globals = Vec::with_capacity(NODE_GLOBALS.len());
let has_global_this = top_level_decls.contains("globalThis");
for global in NODE_GLOBALS.iter() {
if !top_level_decls.contains(&global.to_string()) {
globals.push(*global);
}
specifier: &ModuleSpecifier,
source: &str,
) -> Result<ExtNodeCjsAnalysis, AnyError> {
let analysis = self.inner_cjs_analysis(specifier, source)?;
Ok(ExtNodeCjsAnalysis {
exports: analysis.exports,
reexports: analysis.reexports,
})
}
let mut result = String::new();
let global_this_expr = NODE_GLOBAL_THIS_NAME.as_str();
let global_this_expr = if has_global_this {
global_this_expr
} else {
write!(result, "var globalThis = {global_this_expr};").unwrap();
"globalThis"
};
for global in globals {
write!(result, "var {global} = {global_this_expr}.{global};").unwrap();
fn analyze_esm_top_level_decls(
&self,
specifier: &ModuleSpecifier,
source: &str,
) -> Result<HashSet<String>, AnyError> {
// TODO(dsherret): this code is way more inefficient than it needs to be.
//
// In the future, we should disable capturing tokens & scope analysis
// and instead only use swc's APIs to go through the portions of the tree
// that we know will affect the global scope while still ensuring that
// `var` decls are taken into consideration.
let source_hash = NodeAnalysisCache::compute_source_hash(source);
if let Some(decls) = self
.cache
.get_esm_analysis(specifier.as_str(), &source_hash)
{
Ok(HashSet::from_iter(decls))
} else {
let parsed_source = deno_ast::parse_program(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: deno_ast::SourceTextInfo::from_string(source.to_string()),
media_type: deno_ast::MediaType::from_specifier(specifier),
capture_tokens: true,
scope_analysis: true,
maybe_syntax: None,
})?;
let top_level_decls = analyze_top_level_decls(&parsed_source)?;
self.cache.set_esm_analysis(
specifier.as_str(),
&source_hash,
&top_level_decls.clone().into_iter().collect::<Vec<_>>(),
);
Ok(top_level_decls)
}
}
// strip the shebang
let file_text = if file_text.starts_with("#!/") {
let start_index = file_text.find('\n').unwrap_or(file_text.len());
&file_text[start_index..]
} else {
file_text
};
result.push_str(file_text);
result
}
fn analyze_top_level_decls(
@ -455,236 +176,3 @@ fn is_local_declaration_ident(node: Node) -> bool {
false
}
}
static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
HashSet::from([
"break",
"case",
"catch",
"class",
"const",
"continue",
"debugger",
"default",
"delete",
"do",
"else",
"export",
"extends",
"false",
"finally",
"for",
"function",
"if",
"import",
"in",
"instanceof",
"new",
"null",
"return",
"super",
"switch",
"this",
"throw",
"true",
"try",
"typeof",
"var",
"void",
"while",
"with",
"yield",
"let",
"enum",
"implements",
"interface",
"package",
"private",
"protected",
"public",
"static",
])
});
fn add_export(
source: &mut Vec<String>,
name: &str,
initializer: &str,
temp_var_count: &mut usize,
) {
fn is_valid_var_decl(name: &str) -> bool {
// it's ok to be super strict here
name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
}
// TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
// but it might not be necessary here since our analysis is more detailed?
if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
*temp_var_count += 1;
// we can't create an identifier with a reserved word or invalid identifier name,
// so assign it to a temporary variable that won't have a conflict, then re-export
// it as a string
source.push(format!(
"const __deno_export_{temp_var_count}__ = {initializer};"
));
source.push(format!(
"export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};"
));
} else {
source.push(format!("export const {name} = {initializer};"));
}
}
fn parse_specifier(specifier: &str) -> Option<(String, String)> {
let mut separator_index = specifier.find('/');
let mut valid_package_name = true;
// let mut is_scoped = false;
if specifier.is_empty() {
valid_package_name = false;
} else if specifier.starts_with('@') {
// is_scoped = true;
if let Some(index) = separator_index {
separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
} else {
valid_package_name = false;
}
}
let package_name = if let Some(index) = separator_index {
specifier[0..index].to_string()
} else {
specifier.to_string()
};
// Package name cannot have leading . and cannot have percent-encoding or separators.
for ch in package_name.chars() {
if ch == '%' || ch == '\\' {
valid_package_name = false;
break;
}
}
if !valid_package_name {
return None;
}
let package_subpath = if let Some(index) = separator_index {
format!(".{}", specifier.chars().skip(index).collect::<String>())
} else {
".".to_string()
};
Some((package_name, package_subpath))
}
fn file_extension_probe(
p: PathBuf,
referrer: &Path,
) -> Result<PathBuf, AnyError> {
let p = p.clean();
if p.exists() {
let file_name = p.file_name().unwrap();
let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if p_js.exists() && p_js.is_file() {
return Ok(p_js);
} else if p.is_dir() {
return Ok(p.join("index.js"));
} else {
return Ok(p);
}
} else if let Some(file_name) = p.file_name() {
let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if p_js.exists() && p_js.is_file() {
return Ok(p_js);
}
}
Err(not_found(&p.to_string_lossy(), referrer))
}
fn not_found(path: &str, referrer: &Path) -> AnyError {
let msg = format!(
"[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
path,
referrer.to_string_lossy()
);
std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_esm_code_with_node_globals() {
let r = esm_code_with_node_globals(
&NodeAnalysisCache::new_in_memory(),
&ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(),
"export const x = 1;".to_string(),
)
.unwrap();
assert!(r.contains(&format!(
"var globalThis = {};",
NODE_GLOBAL_THIS_NAME.as_str()
)));
assert!(r.contains("var process = globalThis.process;"));
assert!(r.contains("export const x = 1;"));
}
#[test]
fn test_esm_code_with_node_globals_with_shebang() {
let r = esm_code_with_node_globals(
&NodeAnalysisCache::new_in_memory(),
&ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(),
"#!/usr/bin/env node\nexport const x = 1;".to_string(),
)
.unwrap();
assert_eq!(
r,
format!(
concat!(
"var globalThis = {}",
";var Buffer = globalThis.Buffer;",
"var clearImmediate = globalThis.clearImmediate;var clearInterval = globalThis.clearInterval;",
"var clearTimeout = globalThis.clearTimeout;var console = globalThis.console;",
"var global = globalThis.global;var process = globalThis.process;",
"var setImmediate = globalThis.setImmediate;var setInterval = globalThis.setInterval;",
"var setTimeout = globalThis.setTimeout;\n",
"export const x = 1;"
),
NODE_GLOBAL_THIS_NAME.as_str(),
)
);
}
#[test]
fn test_add_export() {
let mut temp_var_count = 0;
let mut source = vec![];
let exports = vec!["static", "server", "app", "dashed-export"];
for export in exports {
add_export(&mut source, export, "init", &mut temp_var_count);
}
assert_eq!(
source,
vec![
"const __deno_export_1__ = init;".to_string(),
"export { __deno_export_1__ as \"static\" };".to_string(),
"export const server = init;".to_string(),
"export const app = init;".to_string(),
"const __deno_export_2__ = init;".to_string(),
"export { __deno_export_2__ as \"dashed-export\" };".to_string(),
]
)
}
#[test]
fn test_parse_specifier() {
assert_eq!(
parse_specifier("@some-package/core/actions"),
Some(("@some-package/core".to_string(), "./actions".to_string()))
);
}
}

View file

@ -33,14 +33,14 @@ use deno_semver::npm::NpmPackageNv;
use deno_semver::npm::NpmPackageNvReference;
use deno_semver::npm::NpmPackageReqReference;
use crate::npm::CliRequireNpmResolver;
use crate::npm::NpmPackageResolver;
use crate::npm::NpmResolution;
use crate::npm::RequireNpmPackageResolver;
use crate::util::fs::canonicalize_path_maybe_not_exists;
mod analyze;
pub use analyze::NodeCodeTranslator;
pub use analyze::CliCjsEsmCodeAnalyzer;
#[derive(Debug)]
pub enum NodeResolution {
@ -116,7 +116,7 @@ pub fn resolve_builtin_node_module(module_name: &str) -> Result<Url, AnyError> {
pub struct CliNodeResolver {
npm_resolution: Arc<NpmResolution>,
npm_resolver: Arc<NpmPackageResolver>,
require_npm_resolver: RequireNpmPackageResolver,
require_npm_resolver: CliRequireNpmResolver,
}
impl CliNodeResolver {

View file

@ -13,6 +13,6 @@ pub use installer::PackageJsonDepsInstaller;
pub use registry::CliNpmRegistryApi;
pub use resolution::NpmResolution;
pub use resolvers::create_npm_fs_resolver;
pub use resolvers::CliRequireNpmResolver;
pub use resolvers::NpmPackageResolver;
pub use resolvers::NpmProcessState;
pub use resolvers::RequireNpmPackageResolver;

View file

@ -229,17 +229,15 @@ impl NpmPackageResolver {
Ok(())
}
pub fn as_require_npm_resolver(
self: &Arc<Self>,
) -> RequireNpmPackageResolver {
RequireNpmPackageResolver(self.clone())
pub fn as_require_npm_resolver(self: &Arc<Self>) -> CliRequireNpmResolver {
CliRequireNpmResolver(self.clone())
}
}
#[derive(Debug)]
pub struct RequireNpmPackageResolver(Arc<NpmPackageResolver>);
pub struct CliRequireNpmResolver(Arc<NpmPackageResolver>);
impl RequireNpmResolver for RequireNpmPackageResolver {
impl RequireNpmResolver for CliRequireNpmResolver {
fn resolve_package_folder_from_package(
&self,
specifier: &str,

View file

@ -17,10 +17,11 @@ use crate::graph_util::ModuleGraphBuilder;
use crate::graph_util::ModuleGraphContainer;
use crate::http_util::HttpClient;
use crate::module_loader::ModuleLoadPreparer;
use crate::node::CliCjsEsmCodeAnalyzer;
use crate::node::CliNodeResolver;
use crate::node::NodeCodeTranslator;
use crate::npm::create_npm_fs_resolver;
use crate::npm::CliNpmRegistryApi;
use crate::npm::CliRequireNpmResolver;
use crate::npm::NpmCache;
use crate::npm::NpmPackageResolver;
use crate::npm::NpmResolution;
@ -37,6 +38,7 @@ use deno_core::ModuleSpecifier;
use deno_core::SharedArrayBufferStore;
use deno_runtime::deno_broadcast_channel::InMemoryBroadcastChannel;
use deno_runtime::deno_node::analyze::NodeCodeTranslator;
use deno_runtime::deno_tls::rustls::RootCertStore;
use deno_runtime::deno_web::BlobStore;
use deno_runtime::inspector_server::InspectorServer;
@ -75,7 +77,8 @@ pub struct Inner {
maybe_file_watcher_reporter: Option<FileWatcherReporter>,
pub module_graph_builder: Arc<ModuleGraphBuilder>,
pub module_load_preparer: Arc<ModuleLoadPreparer>,
pub node_code_translator: Arc<NodeCodeTranslator>,
pub node_code_translator:
Arc<NodeCodeTranslator<CliCjsEsmCodeAnalyzer, CliRequireNpmResolver>>,
pub node_resolver: Arc<CliNodeResolver>,
pub npm_api: Arc<CliNpmRegistryApi>,
pub npm_cache: Arc<NpmCache>,
@ -304,10 +307,10 @@ impl ProcState {
let file_fetcher = Arc::new(file_fetcher);
let node_analysis_cache =
NodeAnalysisCache::new(caches.node_analysis_db(&dir));
let cjs_esm_analyzer = CliCjsEsmCodeAnalyzer::new(node_analysis_cache);
let node_code_translator = Arc::new(NodeCodeTranslator::new(
node_analysis_cache,
file_fetcher.clone(),
npm_resolver.clone(),
cjs_esm_analyzer,
npm_resolver.as_require_npm_resolver(),
));
let node_resolver = Arc::new(CliNodeResolver::new(
npm_resolution.clone(),

564
ext/node/analyze.rs Normal file
View file

@ -0,0 +1,564 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
use std::collections::HashSet;
use std::collections::VecDeque;
use std::fmt::Write;
use std::path::Path;
use std::path::PathBuf;
use deno_core::anyhow::Context;
use deno_core::ModuleSpecifier;
use once_cell::sync::Lazy;
use deno_core::error::AnyError;
use crate::package_exports_resolve;
use crate::NodeFs;
use crate::NodeModuleKind;
use crate::NodePermissions;
use crate::NodeResolutionMode;
use crate::PackageJson;
use crate::PathClean;
use crate::RequireNpmResolver;
use crate::NODE_GLOBAL_THIS_NAME;
static NODE_GLOBALS: &[&str] = &[
"Buffer",
"clearImmediate",
"clearInterval",
"clearTimeout",
"console",
"global",
"process",
"setImmediate",
"setInterval",
"setTimeout",
];
#[derive(Debug, Clone)]
pub struct CjsAnalysis {
pub exports: Vec<String>,
pub reexports: Vec<String>,
}
/// Code analyzer for CJS and ESM files.
pub trait CjsEsmCodeAnalyzer {
/// Analyzes CommonJs code for exports and reexports, which is
/// then used to determine the wrapper ESM module exports.
fn analyze_cjs(
&self,
specifier: &ModuleSpecifier,
source: &str,
) -> Result<CjsAnalysis, AnyError>;
/// Analyzes ESM code for top level declarations. This is used
/// to help inform injecting node specific globals into Node ESM
/// code. For example, if a top level `setTimeout` function exists
/// then we don't want to inject a `setTimeout` declaration.
///
/// Note: This will go away in the future once we do this all in v8.
fn analyze_esm_top_level_decls(
&self,
specifier: &ModuleSpecifier,
source: &str,
) -> Result<HashSet<String>, AnyError>;
}
pub struct NodeCodeTranslator<
TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer,
TRequireNpmResolver: RequireNpmResolver,
> {
cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer,
npm_resolver: TRequireNpmResolver,
}
impl<
TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer,
TRequireNpmResolver: RequireNpmResolver,
> NodeCodeTranslator<TCjsEsmCodeAnalyzer, TRequireNpmResolver>
{
pub fn new(
cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer,
npm_resolver: TRequireNpmResolver,
) -> Self {
Self {
cjs_esm_code_analyzer,
npm_resolver,
}
}
/// Resolves the code to be used when executing Node specific ESM code.
///
/// Note: This will go away in the future once we do this all in v8.
pub fn esm_code_with_node_globals(
&self,
specifier: &ModuleSpecifier,
source: &str,
) -> Result<String, AnyError> {
let top_level_decls = self
.cjs_esm_code_analyzer
.analyze_esm_top_level_decls(specifier, source)?;
Ok(esm_code_from_top_level_decls(source, &top_level_decls))
}
/// Translates given CJS module into ESM. This function will perform static
/// analysis on the file to find defined exports and reexports.
///
/// For all discovered reexports the analysis will be performed recursively.
///
/// If successful a source code for equivalent ES module is returned.
pub fn translate_cjs_to_esm<Fs: NodeFs>(
&self,
specifier: &ModuleSpecifier,
source: &str,
permissions: &mut dyn NodePermissions,
) -> Result<String, AnyError> {
let mut temp_var_count = 0;
let mut handled_reexports: HashSet<String> = HashSet::default();
let analysis = self.cjs_esm_code_analyzer.analyze_cjs(specifier, source)?;
let mut source = vec![
r#"import {createRequire as __internalCreateRequire} from "node:module";
const require = __internalCreateRequire(import.meta.url);"#
.to_string(),
];
let mut all_exports = analysis
.exports
.iter()
.map(|s| s.to_string())
.collect::<HashSet<_>>();
// (request, referrer)
let mut reexports_to_handle = VecDeque::new();
for reexport in analysis.reexports {
reexports_to_handle.push_back((reexport, specifier.clone()));
}
while let Some((reexport, referrer)) = reexports_to_handle.pop_front() {
if handled_reexports.contains(&reexport) {
continue;
}
handled_reexports.insert(reexport.to_string());
// First, resolve relate reexport specifier
let resolved_reexport = self.resolve::<Fs>(
&reexport,
&referrer,
// FIXME(bartlomieju): check if these conditions are okay, probably
// should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
&["deno", "require", "default"],
NodeResolutionMode::Execution,
permissions,
)?;
// Second, read the source code from disk
let reexport_specifier =
ModuleSpecifier::from_file_path(&resolved_reexport).unwrap();
let reexport_file_text = Fs::read_to_string(&resolved_reexport)
.with_context(|| {
format!(
"Could not find '{}' ({}) referenced from {}",
reexport, reexport_specifier, referrer
)
})?;
{
let analysis = self
.cjs_esm_code_analyzer
.analyze_cjs(&reexport_specifier, &reexport_file_text)?;
for reexport in analysis.reexports {
reexports_to_handle.push_back((reexport, reexport_specifier.clone()));
}
all_exports.extend(
analysis
.exports
.into_iter()
.filter(|e| e.as_str() != "default"),
);
}
}
source.push(format!(
"const mod = require(\"{}\");",
specifier
.to_file_path()
.unwrap()
.to_str()
.unwrap()
.replace('\\', "\\\\")
.replace('\'', "\\\'")
.replace('\"', "\\\"")
));
for export in &all_exports {
if export.as_str() != "default" {
add_export(
&mut source,
export,
&format!("mod[\"{export}\"]"),
&mut temp_var_count,
);
}
}
source.push("export default mod;".to_string());
let translated_source = source.join("\n");
Ok(translated_source)
}
fn resolve<Fs: NodeFs>(
&self,
specifier: &str,
referrer: &ModuleSpecifier,
conditions: &[&str],
mode: NodeResolutionMode,
permissions: &mut dyn NodePermissions,
) -> Result<PathBuf, AnyError> {
if specifier.starts_with('/') {
todo!();
}
let referrer_path = referrer.to_file_path().unwrap();
if specifier.starts_with("./") || specifier.starts_with("../") {
if let Some(parent) = referrer_path.parent() {
return file_extension_probe::<Fs>(
parent.join(specifier),
&referrer_path,
);
} else {
todo!();
}
}
// We've got a bare specifier or maybe bare_specifier/blah.js"
let (package_specifier, package_subpath) =
parse_specifier(specifier).unwrap();
// todo(dsherret): use not_found error on not found here
let module_dir = self.npm_resolver.resolve_package_folder_from_package(
package_specifier.as_str(),
&referrer_path,
mode,
)?;
let package_json_path = module_dir.join("package.json");
if Fs::exists(&package_json_path) {
let package_json = PackageJson::load::<Fs>(
&self.npm_resolver,
permissions,
package_json_path.clone(),
)?;
if let Some(exports) = &package_json.exports {
return package_exports_resolve::<Fs>(
&package_json_path,
package_subpath,
exports,
referrer,
NodeModuleKind::Esm,
conditions,
mode,
&self.npm_resolver,
permissions,
);
}
// old school
if package_subpath != "." {
let d = module_dir.join(package_subpath);
if Fs::is_dir(&d) {
// subdir might have a package.json that specifies the entrypoint
let package_json_path = d.join("package.json");
if Fs::exists(&package_json_path) {
let package_json = PackageJson::load::<Fs>(
&self.npm_resolver,
permissions,
package_json_path,
)?;
if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(d.join(main).clean());
}
}
return Ok(d.join("index.js").clean());
}
return file_extension_probe::<Fs>(d, &referrer_path);
} else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(module_dir.join(main).clean());
} else {
return Ok(module_dir.join("index.js").clean());
}
}
Err(not_found(specifier, &referrer_path))
}
}
fn esm_code_from_top_level_decls(
file_text: &str,
top_level_decls: &HashSet<String>,
) -> String {
let mut globals = Vec::with_capacity(NODE_GLOBALS.len());
let has_global_this = top_level_decls.contains("globalThis");
for global in NODE_GLOBALS.iter() {
if !top_level_decls.contains(&global.to_string()) {
globals.push(*global);
}
}
let mut result = String::new();
let global_this_expr = NODE_GLOBAL_THIS_NAME.as_str();
let global_this_expr = if has_global_this {
global_this_expr
} else {
write!(result, "var globalThis = {global_this_expr};").unwrap();
"globalThis"
};
for global in globals {
write!(result, "var {global} = {global_this_expr}.{global};").unwrap();
}
// strip the shebang
let file_text = if file_text.starts_with("#!/") {
let start_index = file_text.find('\n').unwrap_or(file_text.len());
&file_text[start_index..]
} else {
file_text
};
result.push_str(file_text);
result
}
static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
HashSet::from([
"break",
"case",
"catch",
"class",
"const",
"continue",
"debugger",
"default",
"delete",
"do",
"else",
"export",
"extends",
"false",
"finally",
"for",
"function",
"if",
"import",
"in",
"instanceof",
"new",
"null",
"return",
"super",
"switch",
"this",
"throw",
"true",
"try",
"typeof",
"var",
"void",
"while",
"with",
"yield",
"let",
"enum",
"implements",
"interface",
"package",
"private",
"protected",
"public",
"static",
])
});
fn add_export(
source: &mut Vec<String>,
name: &str,
initializer: &str,
temp_var_count: &mut usize,
) {
fn is_valid_var_decl(name: &str) -> bool {
// it's ok to be super strict here
name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
}
// TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
// but it might not be necessary here since our analysis is more detailed?
if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
*temp_var_count += 1;
// we can't create an identifier with a reserved word or invalid identifier name,
// so assign it to a temporary variable that won't have a conflict, then re-export
// it as a string
source.push(format!(
"const __deno_export_{temp_var_count}__ = {initializer};"
));
source.push(format!(
"export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};"
));
} else {
source.push(format!("export const {name} = {initializer};"));
}
}
fn parse_specifier(specifier: &str) -> Option<(String, String)> {
let mut separator_index = specifier.find('/');
let mut valid_package_name = true;
// let mut is_scoped = false;
if specifier.is_empty() {
valid_package_name = false;
} else if specifier.starts_with('@') {
// is_scoped = true;
if let Some(index) = separator_index {
separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
} else {
valid_package_name = false;
}
}
let package_name = if let Some(index) = separator_index {
specifier[0..index].to_string()
} else {
specifier.to_string()
};
// Package name cannot have leading . and cannot have percent-encoding or separators.
for ch in package_name.chars() {
if ch == '%' || ch == '\\' {
valid_package_name = false;
break;
}
}
if !valid_package_name {
return None;
}
let package_subpath = if let Some(index) = separator_index {
format!(".{}", specifier.chars().skip(index).collect::<String>())
} else {
".".to_string()
};
Some((package_name, package_subpath))
}
fn file_extension_probe<Fs: NodeFs>(
p: PathBuf,
referrer: &Path,
) -> Result<PathBuf, AnyError> {
let p = p.clean();
if Fs::exists(&p) {
let file_name = p.file_name().unwrap();
let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if Fs::is_file(&p_js) {
return Ok(p_js);
} else if Fs::is_dir(&p) {
return Ok(p.join("index.js"));
} else {
return Ok(p);
}
} else if let Some(file_name) = p.file_name() {
let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if Fs::is_file(&p_js) {
return Ok(p_js);
}
}
Err(not_found(&p.to_string_lossy(), referrer))
}
fn not_found(path: &str, referrer: &Path) -> AnyError {
let msg = format!(
"[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
path,
referrer.to_string_lossy()
);
std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_esm_code_with_node_globals() {
let r = esm_code_from_top_level_decls(
"export const x = 1;",
&HashSet::from(["x".to_string()]),
);
assert!(r.contains(&format!(
"var globalThis = {};",
NODE_GLOBAL_THIS_NAME.as_str()
)));
assert!(r.contains("var process = globalThis.process;"));
assert!(r.contains("export const x = 1;"));
}
#[test]
fn test_esm_code_with_node_globals_with_shebang() {
let r = esm_code_from_top_level_decls(
"#!/usr/bin/env node\nexport const x = 1;",
&HashSet::from(["x".to_string()]),
);
assert_eq!(
r,
format!(
concat!(
"var globalThis = {}",
";var Buffer = globalThis.Buffer;",
"var clearImmediate = globalThis.clearImmediate;var clearInterval = globalThis.clearInterval;",
"var clearTimeout = globalThis.clearTimeout;var console = globalThis.console;",
"var global = globalThis.global;var process = globalThis.process;",
"var setImmediate = globalThis.setImmediate;var setInterval = globalThis.setInterval;",
"var setTimeout = globalThis.setTimeout;\n",
"export const x = 1;"
),
NODE_GLOBAL_THIS_NAME.as_str(),
)
);
}
#[test]
fn test_add_export() {
let mut temp_var_count = 0;
let mut source = vec![];
let exports = vec!["static", "server", "app", "dashed-export"];
for export in exports {
add_export(&mut source, export, "init", &mut temp_var_count);
}
assert_eq!(
source,
vec![
"const __deno_export_1__ = init;".to_string(),
"export { __deno_export_1__ as \"static\" };".to_string(),
"export const server = init;".to_string(),
"export const app = init;".to_string(),
"const __deno_export_2__ = init;".to_string(),
"export { __deno_export_2__ as \"dashed-export\" };".to_string(),
]
)
}
#[test]
fn test_parse_specifier() {
assert_eq!(
parse_specifier("@some-package/core/actions"),
Some(("@some-package/core".to_string(), "./actions".to_string()))
);
}
}

View file

@ -228,6 +228,8 @@ fn x509name_to_string(
name: &X509Name,
oid_registry: &oid_registry::OidRegistry,
) -> Result<String, x509_parser::error::X509Error> {
// Lifted from https://github.com/rusticata/x509-parser/blob/4d618c2ed6b1fc102df16797545895f7c67ee0fe/src/x509.rs#L543-L566
// since it's a private function (Copyright 2017 Pierre Chifflier)
name.iter_rdn().fold(Ok(String::new()), |acc, rdn| {
acc.and_then(|mut _vec| {
rdn
@ -244,13 +246,13 @@ fn x509name_to_string(
let rdn = format!("{}={}", abbrev, val_str);
match _vec2.len() {
0 => Ok(rdn),
_ => Ok(_vec2 + " + " + &rdn),
_ => Ok(_vec2 + " + " + rdn.as_str()),
}
})
})
.map(|v| match _vec.len() {
0 => v,
_ => _vec + "\n" + &v,
_ => _vec + "\n" + v.as_str(),
})
})
})

View file

@ -12,6 +12,7 @@ use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
pub mod analyze;
mod crypto;
pub mod errors;
mod idna;