perf: node cjs & esm analysis cache (#16097)

This commit adds a cache for CJS and ESM analysis that is backed by an
SQLite file.

The connection to the DB is lazily created on first use, so shouldn't
have impact on the startup time.

Benched with running Vite

Deno v1.26:
```
$ deno task dev
Warning deno task is unstable and may drastically change in the future
Task dev deno run -A --unstable --node-modules-dir npm:vite

  VITE v3.1.4  ready in 961 ms

  ➜  Local:   http://localhost:5173/
  ➜  Network: use --host to expose
```

This branch:
```
../deno/target/release/deno task dev
Warning deno task is unstable and may drastically change in the future
Task dev deno run -A --unstable --node-modules-dir npm:vite

  VITE v3.1.4  ready in 330 ms

  ➜  Local:   http://localhost:5173/
  ➜  Network: use --host to expose
```

Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
This commit is contained in:
David Sherret 2022-10-01 06:15:56 -04:00 committed by GitHub
parent 1058d1868f
commit ecfafda9d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 449 additions and 14 deletions

2
cli/cache/mod.rs vendored
View file

@ -19,6 +19,7 @@ mod common;
mod disk_cache;
mod emit;
mod incremental;
mod node;
mod parsed_source;
pub use check::TypeCheckCache;
@ -26,6 +27,7 @@ pub use common::FastInsecureHasher;
pub use disk_cache::DiskCache;
pub use emit::EmitCache;
pub use incremental::IncrementalCache;
pub use node::NodeAnalysisCache;
pub use parsed_source::ParsedSourceCache;
/// A "wrapper" for the FileFetcher and DiskCache for the Deno CLI that provides

380
cli/cache/node.rs vendored Normal file
View file

@ -0,0 +1,380 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
use std::path::Path;
use deno_ast::CjsAnalysis;
use deno_core::error::AnyError;
use deno_core::parking_lot::Mutex;
use deno_core::serde_json;
use deno_runtime::deno_webstorage::rusqlite::params;
use deno_runtime::deno_webstorage::rusqlite::Connection;
use serde::Deserialize;
use serde::Serialize;
use std::path::PathBuf;
use std::sync::Arc;
use super::common::run_sqlite_pragma;
use super::FastInsecureHasher;
// todo(dsherret): use deno_ast::CjsAnalysisData directly when upgrading deno_ast
// See https://github.com/denoland/deno_ast/pull/117
#[derive(Serialize, Deserialize)]
struct CjsAnalysisData {
pub exports: Vec<String>,
pub reexports: Vec<String>,
}
pub struct NodeAnalysisCache {
db_file_path: Option<PathBuf>,
inner: Arc<Mutex<Option<Option<NodeAnalysisCacheInner>>>>,
}
impl NodeAnalysisCache {
pub fn new(db_file_path: Option<PathBuf>) -> Self {
Self {
db_file_path,
inner: Default::default(),
}
}
pub fn compute_source_hash(text: &str) -> String {
FastInsecureHasher::new()
.write_str(text)
.finish()
.to_string()
}
pub fn get_cjs_analysis(
&self,
specifier: &str,
expected_source_hash: &str,
) -> Option<CjsAnalysis> {
self
.with_inner(|inner| {
inner.get_cjs_analysis(specifier, expected_source_hash)
})
.flatten()
}
pub fn set_cjs_analysis(
&self,
specifier: &str,
source_hash: &str,
cjs_analysis: &CjsAnalysis,
) {
self.with_inner(|inner| {
inner.set_cjs_analysis(specifier, source_hash, cjs_analysis)
});
}
pub fn get_esm_analysis(
&self,
specifier: &str,
expected_source_hash: &str,
) -> Option<Vec<String>> {
self
.with_inner(|inner| {
inner.get_esm_analysis(specifier, expected_source_hash)
})
.flatten()
}
pub fn set_esm_analysis(
&self,
specifier: &str,
source_hash: &str,
top_level_decls: &Vec<String>,
) {
self.with_inner(|inner| {
inner.set_esm_analysis(specifier, source_hash, top_level_decls)
});
}
fn with_inner<TResult>(
&self,
action: impl FnOnce(&NodeAnalysisCacheInner) -> Result<TResult, AnyError>,
) -> Option<TResult> {
// lazily create the cache in order to not
let mut maybe_created = self.inner.lock();
let inner = match maybe_created.as_ref() {
Some(maybe_inner) => maybe_inner.as_ref(),
None => {
let maybe_inner = match NodeAnalysisCacheInner::new(
self.db_file_path.as_deref(),
crate::version::deno(),
) {
Ok(cache) => Some(cache),
Err(err) => {
// should never error here, but if it ever does don't fail
if cfg!(debug_assertions) {
panic!("Error creating node analysis cache: {:#}", err);
} else {
log::debug!("Error creating node analysis cache: {:#}", err);
None
}
}
};
*maybe_created = Some(maybe_inner);
maybe_created.as_ref().and_then(|p| p.as_ref())
}
}?;
match action(inner) {
Ok(result) => Some(result),
Err(err) => {
// should never error here, but if it ever does don't fail
if cfg!(debug_assertions) {
panic!("Error using esm analysis: {:#}", err);
} else {
log::debug!("Error using esm analysis: {:#}", err);
}
None
}
}
}
}
struct NodeAnalysisCacheInner {
conn: Connection,
}
impl NodeAnalysisCacheInner {
pub fn new(
db_file_path: Option<&Path>,
version: String,
) -> Result<Self, AnyError> {
let conn = match db_file_path {
Some(path) => Connection::open(path)?,
None => Connection::open_in_memory()?,
};
Self::from_connection(conn, version)
}
fn from_connection(
conn: Connection,
version: String,
) -> Result<Self, AnyError> {
run_sqlite_pragma(&conn)?;
create_tables(&conn, &version)?;
Ok(Self { conn })
}
pub fn get_cjs_analysis(
&self,
specifier: &str,
expected_source_hash: &str,
) -> Result<Option<CjsAnalysis>, AnyError> {
let query = "
SELECT
data
FROM
cjsanalysiscache
WHERE
specifier=?1
AND source_hash=?2
LIMIT 1";
let mut stmt = self.conn.prepare_cached(query)?;
let mut rows = stmt.query(params![specifier, &expected_source_hash])?;
if let Some(row) = rows.next()? {
let analysis_info: String = row.get(0)?;
let analysis_info: CjsAnalysisData =
serde_json::from_str(&analysis_info)?;
Ok(Some(CjsAnalysis {
exports: analysis_info.exports,
reexports: analysis_info.reexports,
}))
} else {
Ok(None)
}
}
pub fn set_cjs_analysis(
&self,
specifier: &str,
source_hash: &str,
cjs_analysis: &CjsAnalysis,
) -> Result<(), AnyError> {
let sql = "
INSERT OR REPLACE INTO
cjsanalysiscache (specifier, source_hash, data)
VALUES
(?1, ?2, ?3)";
let mut stmt = self.conn.prepare_cached(sql)?;
stmt.execute(params![
specifier,
&source_hash.to_string(),
&serde_json::to_string(&CjsAnalysisData {
// temporary clones until upgrading deno_ast
exports: cjs_analysis.exports.clone(),
reexports: cjs_analysis.reexports.clone(),
})?,
])?;
Ok(())
}
pub fn get_esm_analysis(
&self,
specifier: &str,
expected_source_hash: &str,
) -> Result<Option<Vec<String>>, AnyError> {
let query = "
SELECT
data
FROM
esmglobalscache
WHERE
specifier=?1
AND source_hash=?2
LIMIT 1";
let mut stmt = self.conn.prepare_cached(query)?;
let mut rows = stmt.query(params![specifier, &expected_source_hash])?;
if let Some(row) = rows.next()? {
let top_level_decls: String = row.get(0)?;
let decls: Vec<String> = serde_json::from_str(&top_level_decls)?;
Ok(Some(decls))
} else {
Ok(None)
}
}
pub fn set_esm_analysis(
&self,
specifier: &str,
source_hash: &str,
top_level_decls: &Vec<String>,
) -> Result<(), AnyError> {
let sql = "
INSERT OR REPLACE INTO
esmglobalscache (specifier, source_hash, data)
VALUES
(?1, ?2, ?3)";
let mut stmt = self.conn.prepare_cached(sql)?;
stmt.execute(params![
specifier,
&source_hash.to_string(),
&serde_json::to_string(top_level_decls)?,
])?;
Ok(())
}
}
fn create_tables(conn: &Connection, cli_version: &str) -> Result<(), AnyError> {
// INT doesn't store up to u64, so use TEXT for source_hash
conn.execute(
"CREATE TABLE IF NOT EXISTS cjsanalysiscache (
specifier TEXT PRIMARY KEY,
source_hash TEXT NOT NULL,
data TEXT NOT NULL
)",
[],
)?;
conn.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS cjsanalysiscacheidx
ON cjsanalysiscache(specifier)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS esmglobalscache (
specifier TEXT PRIMARY KEY,
source_hash TEXT NOT NULL,
data TEXT NOT NULL
)",
[],
)?;
conn.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS esmglobalscacheidx
ON esmglobalscache(specifier)",
[],
)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS info (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)",
[],
)?;
// delete the cache when the CLI version changes
let data_cli_version: Option<String> = conn
.query_row(
"SELECT value FROM info WHERE key='CLI_VERSION' LIMIT 1",
[],
|row| row.get(0),
)
.ok();
if data_cli_version != Some(cli_version.to_string()) {
conn.execute("DELETE FROM cjsanalysiscache", params![])?;
conn.execute("DELETE FROM esmglobalscache", params![])?;
let mut stmt = conn
.prepare("INSERT OR REPLACE INTO info (key, value) VALUES (?1, ?2)")?;
stmt.execute(params!["CLI_VERSION", &cli_version])?;
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
#[test]
pub fn node_analysis_cache_general_use() {
let conn = Connection::open_in_memory().unwrap();
let cache =
NodeAnalysisCacheInner::from_connection(conn, "1.0.0".to_string())
.unwrap();
assert!(cache.get_cjs_analysis("file.js", "2").unwrap().is_none());
let cjs_analysis = CjsAnalysis {
exports: vec!["export1".to_string()],
reexports: vec!["re-export1".to_string()],
};
cache
.set_cjs_analysis("file.js", "2", &cjs_analysis)
.unwrap();
assert!(cache.get_cjs_analysis("file.js", "3").unwrap().is_none()); // different hash
let actual_cjs_analysis =
cache.get_cjs_analysis("file.js", "2").unwrap().unwrap();
assert_eq!(actual_cjs_analysis.exports, cjs_analysis.exports);
assert_eq!(actual_cjs_analysis.reexports, cjs_analysis.reexports);
assert!(cache.get_esm_analysis("file.js", "2").unwrap().is_none());
let esm_analysis = vec!["esm1".to_string()];
cache
.set_esm_analysis("file.js", "2", &esm_analysis)
.unwrap();
assert!(cache.get_esm_analysis("file.js", "3").unwrap().is_none()); // different hash
let actual_esm_analysis =
cache.get_esm_analysis("file.js", "2").unwrap().unwrap();
assert_eq!(actual_esm_analysis, esm_analysis);
// adding when already exists should not cause issue
cache
.set_cjs_analysis("file.js", "2", &cjs_analysis)
.unwrap();
cache
.set_esm_analysis("file.js", "2", &esm_analysis)
.unwrap();
// recreating with same cli version should still have it
let conn = cache.conn;
let cache =
NodeAnalysisCacheInner::from_connection(conn, "1.0.0".to_string())
.unwrap();
let actual_analysis =
cache.get_cjs_analysis("file.js", "2").unwrap().unwrap();
assert_eq!(actual_analysis.exports, cjs_analysis.exports);
assert_eq!(actual_analysis.reexports, cjs_analysis.reexports);
let actual_esm_analysis =
cache.get_esm_analysis("file.js", "2").unwrap().unwrap();
assert_eq!(actual_esm_analysis, esm_analysis);
// now changing the cli version should clear it
let conn = cache.conn;
let cache =
NodeAnalysisCacheInner::from_connection(conn, "2.0.0".to_string())
.unwrap();
assert!(cache.get_cjs_analysis("file.js", "2").unwrap().is_none());
assert!(cache.get_esm_analysis("file.js", "2").unwrap().is_none());
}
}

View file

@ -64,6 +64,12 @@ impl DenoDir {
self.root.join("dep_analysis_cache_v1")
}
/// Path for caching node analysis.
pub fn node_analysis_db_file_path(&self) -> PathBuf {
// bump this version name to invalidate the entire cache
self.root.join("node_analysis_cache_v1")
}
/// Path for the cache used for type checking.
pub fn type_checking_cache_db_file_path(&self) -> PathBuf {
// bump this version name to invalidate the entire cache

View file

@ -156,10 +156,15 @@ impl CliModuleLoader {
code,
MediaType::Cjs,
&self.ps.npm_resolver,
&self.ps.node_analysis_cache,
)?
} else {
// only inject node globals for esm
node::esm_code_with_node_globals(specifier, code)?
node::esm_code_with_node_globals(
&self.ps.node_analysis_cache,
specifier,
code,
)?
};
ModuleCodeSource {
code,

View file

@ -12,6 +12,8 @@ use deno_core::error::AnyError;
use deno_runtime::deno_node::NODE_GLOBAL_THIS_NAME;
use std::fmt::Write;
use crate::cache::NodeAnalysisCache;
static NODE_GLOBALS: &[&str] = &[
"Buffer",
"clearImmediate",
@ -32,18 +34,34 @@ static NODE_GLOBALS: &[&str] = &[
// `var` decls are taken into consideration.
pub fn esm_code_with_node_globals(
analysis_cache: &NodeAnalysisCache,
specifier: &ModuleSpecifier,
code: String,
) -> Result<String, AnyError> {
let parsed_source = deno_ast::parse_program(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: deno_ast::SourceTextInfo::from_string(code),
media_type: deno_ast::MediaType::from(specifier),
capture_tokens: true,
scope_analysis: true,
maybe_syntax: None,
})?;
let top_level_decls = analyze_top_level_decls(&parsed_source)?;
let source_hash = NodeAnalysisCache::compute_source_hash(&code);
let text_info = deno_ast::SourceTextInfo::from_string(code);
let top_level_decls = if let Some(decls) =
analysis_cache.get_esm_analysis(specifier.as_str(), &source_hash)
{
HashSet::from_iter(decls)
} else {
let parsed_source = deno_ast::parse_program(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: text_info.clone(),
media_type: deno_ast::MediaType::from(specifier),
capture_tokens: true,
scope_analysis: true,
maybe_syntax: None,
})?;
let top_level_decls = analyze_top_level_decls(&parsed_source)?;
analysis_cache.set_esm_analysis(
specifier.as_str(),
&source_hash,
&top_level_decls.clone().into_iter().collect(),
);
top_level_decls
};
let mut globals = Vec::with_capacity(NODE_GLOBALS.len());
let has_global_this = top_level_decls.contains("globalThis");
for global in NODE_GLOBALS.iter() {
@ -64,7 +82,7 @@ pub fn esm_code_with_node_globals(
write!(result, "var {0} = {1}.{0};", global, global_this_expr).unwrap();
}
let file_text = parsed_source.text_info().text_str();
let file_text = text_info.text_str();
// strip the shebang
let file_text = if file_text.starts_with("#!/") {
let start_index = file_text.find('\n').unwrap_or(file_text.len());
@ -148,6 +166,7 @@ mod tests {
#[test]
fn test_esm_code_with_node_globals() {
let r = esm_code_with_node_globals(
&NodeAnalysisCache::new(None),
&ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(),
"export const x = 1;".to_string(),
)
@ -163,6 +182,7 @@ mod tests {
#[test]
fn test_esm_code_with_node_globals_with_shebang() {
let r = esm_code_with_node_globals(
&NodeAnalysisCache::new(None),
&ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(),
"#!/usr/bin/env node\nexport const x = 1;".to_string(),
)

View file

@ -5,6 +5,7 @@ use std::collections::VecDeque;
use std::path::Path;
use std::path::PathBuf;
use crate::cache::NodeAnalysisCache;
use crate::deno_std::CURRENT_STD_URL;
use deno_ast::CjsAnalysis;
use deno_ast::MediaType;
@ -734,12 +735,20 @@ pub fn translate_cjs_to_esm(
code: String,
media_type: MediaType,
npm_resolver: &NpmPackageResolver,
node_analysis_cache: &NodeAnalysisCache,
) -> Result<String, AnyError> {
fn perform_cjs_analysis(
analysis_cache: &NodeAnalysisCache,
specifier: &str,
media_type: MediaType,
code: String,
) -> Result<CjsAnalysis, AnyError> {
let source_hash = NodeAnalysisCache::compute_source_hash(&code);
if let Some(analysis) =
analysis_cache.get_cjs_analysis(specifier, &source_hash)
{
return Ok(analysis);
}
let parsed_source = deno_ast::parse_script(deno_ast::ParseParams {
specifier: specifier.to_string(),
text_info: deno_ast::SourceTextInfo::new(code.into()),
@ -748,7 +757,10 @@ pub fn translate_cjs_to_esm(
scope_analysis: false,
maybe_syntax: None,
})?;
Ok(parsed_source.analyze_cjs())
let analysis = parsed_source.analyze_cjs();
analysis_cache.set_cjs_analysis(specifier, &source_hash, &analysis);
Ok(analysis)
}
let mut temp_var_count = 0;
@ -758,7 +770,12 @@ pub fn translate_cjs_to_esm(
r#"const require = Deno[Deno.internal].require.Module.createRequire(import.meta.url);"#.to_string(),
];
let analysis = perform_cjs_analysis(specifier.as_str(), media_type, code)?;
let analysis = perform_cjs_analysis(
node_analysis_cache,
specifier.as_str(),
media_type,
code,
)?;
let mut all_exports = analysis
.exports
@ -804,6 +821,7 @@ pub fn translate_cjs_to_esm(
{
let analysis = perform_cjs_analysis(
node_analysis_cache,
reexport_specifier.as_str(),
reexport_file.media_type,
reexport_file.source.to_string(),

View file

@ -15,7 +15,6 @@ use global::GlobalNpmPackageResolver;
use once_cell::sync::Lazy;
use serde::Deserialize;
use serde::Serialize;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;

View file

@ -7,6 +7,7 @@ use crate::args::TypeCheckMode;
use crate::cache;
use crate::cache::EmitCache;
use crate::cache::FastInsecureHasher;
use crate::cache::NodeAnalysisCache;
use crate::cache::ParsedSourceCache;
use crate::cache::TypeCheckCache;
use crate::deno_dir;
@ -88,6 +89,7 @@ pub struct Inner {
pub parsed_source_cache: ParsedSourceCache,
maybe_resolver: Option<Arc<dyn deno_graph::source::Resolver + Send + Sync>>,
maybe_file_watcher_reporter: Option<FileWatcherReporter>,
pub node_analysis_cache: NodeAnalysisCache,
pub npm_cache: NpmCache,
pub npm_resolver: NpmPackageResolver,
pub cjs_resolutions: Mutex<HashSet<ModuleSpecifier>>,
@ -245,6 +247,8 @@ impl ProcState {
.resolve_local_node_modules_folder()
.with_context(|| "Resolving local node_modules folder.")?,
);
let node_analysis_cache =
NodeAnalysisCache::new(Some(dir.node_analysis_db_file_path()));
let emit_options: deno_ast::EmitOptions = ts_config_result.ts_config.into();
Ok(ProcState(Arc::new(Inner {
@ -268,6 +272,7 @@ impl ProcState {
parsed_source_cache,
maybe_resolver,
maybe_file_watcher_reporter,
node_analysis_cache,
npm_cache,
npm_resolver,
cjs_resolutions: Default::default(),