diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs index 9ba5c1c996..44b98fcee0 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache.rs @@ -25,6 +25,7 @@ use crate::util::fs::hard_link_dir_recursive; use crate::util::progress_bar::ProgressBar; use super::tarball::verify_and_extract_tarball; +use super::tarball::TarballExtractionMode; /// Stores a single copy of npm packages in a cache. #[derive(Debug)] @@ -69,7 +70,7 @@ impl NpmCache { /// to ensure a package is only downloaded once per run of the CLI. This /// prevents downloads from re-occurring when someone has `--reload` and /// and imports a dynamic import that imports the same package again for example. - fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool { + fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { self.cache_setting.should_use_for_npm_package(&package.name) || !self .previously_reloaded_packages @@ -91,26 +92,23 @@ impl NpmCache { async fn ensure_package_inner( &self, - package: &PackageNv, + package_nv: &PackageNv, dist: &NpmPackageVersionDistInfo, registry_url: &Url, ) -> Result<(), AnyError> { let package_folder = self .cache_dir - .package_folder_for_name_and_version(package, registry_url); - if self.should_use_global_cache_for_package(package) - && self.fs.exists_sync(&package_folder) - // if this file exists, then the package didn't successfully extract - // the first time, or another process is currently extracting the zip file - && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME)) - { + .package_folder_for_name_and_version(package_nv, registry_url); + let should_use_cache = self.should_use_cache_for_package(package_nv); + let package_folder_exists = self.fs.exists_sync(&package_folder); + if should_use_cache && package_folder_exists { return Ok(()); } else if self.cache_setting == CacheSetting::Only { return Err(custom_error( "NotCached", format!( "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - &package.name + &package_nv.name ) ) ); @@ -127,7 +125,31 @@ impl NpmCache { .await?; match maybe_bytes { Some(bytes) => { - verify_and_extract_tarball(package, &bytes, dist, &package_folder) + let extraction_mode = if should_use_cache || !package_folder_exists { + TarballExtractionMode::SiblingTempDir + } else { + // The user ran with `--reload`, so overwrite the package instead of + // deleting it since the package might get corrupted if a user kills + // their deno process while it's deleting a package directory + // + // We can't rename this folder and delete it because the folder + // may be in use by another process or may now contain hardlinks, + // which will cause windows to throw an "AccessDenied" error when + // renaming. So we settle for overwriting. + TarballExtractionMode::Overwrite + }; + let dist = dist.clone(); + let package_nv = package_nv.clone(); + deno_core::unsync::spawn_blocking(move || { + verify_and_extract_tarball( + &package_nv, + &bytes, + &dist, + &package_folder, + extraction_mode, + ) + }) + .await? } None => { bail!("Could not find npm package tarball at: {}", dist.tarball); @@ -150,7 +172,7 @@ impl NpmCache { .package_folder_for_id(folder_id, registry_url); if package_folder.exists() - // if this file exists, then the package didn't successfully extract + // if this file exists, then the package didn't successfully initialize // the first time, or another process is currently extracting the zip file && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) @@ -161,6 +183,9 @@ impl NpmCache { let original_package_folder = self .cache_dir .package_folder_for_name_and_version(&folder_id.nv, registry_url); + + // it seems Windows does an "AccessDenied" error when moving a + // directory with hard links, so that's why this solution is done with_folder_sync_lock(&folder_id.nv, &package_folder, || { hard_link_dir_recursive(&original_package_folder, &package_folder) })?; @@ -206,7 +231,7 @@ impl NpmCache { const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; -pub fn with_folder_sync_lock( +fn with_folder_sync_lock( package: &PackageNv, output_folder: &Path, action: impl FnOnce() -> Result<(), AnyError>, diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 1267b13d8c..e2d242e662 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -2,12 +2,14 @@ use std::collections::HashSet; use std::fs; +use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; use base64::prelude::BASE64_STANDARD; use base64::Engine; use deno_core::anyhow::bail; +use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_npm::registry::NpmPackageVersionDistInfo; use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; @@ -16,19 +18,76 @@ use flate2::read::GzDecoder; use tar::Archive; use tar::EntryType; -use super::cache::with_folder_sync_lock; +use crate::util::path::get_atomic_dir_path; + +#[derive(Debug, Copy, Clone)] +pub enum TarballExtractionMode { + /// Overwrites the destination directory without deleting any files. + Overwrite, + /// Creates and writes to a sibling temporary directory. When done, moves + /// it to the final destination. + /// + /// This is more robust than `Overwrite` as it better handles multiple + /// processes writing to the directory at the same time. + SiblingTempDir, +} pub fn verify_and_extract_tarball( - package: &PackageNv, + package_nv: &PackageNv, data: &[u8], dist_info: &NpmPackageVersionDistInfo, output_folder: &Path, + extraction_mode: TarballExtractionMode, ) -> Result<(), AnyError> { - verify_tarball_integrity(package, data, &dist_info.integrity())?; + verify_tarball_integrity(package_nv, data, &dist_info.integrity())?; - with_folder_sync_lock(package, output_folder, || { - extract_tarball(data, output_folder) - }) + match extraction_mode { + TarballExtractionMode::Overwrite => extract_tarball(data, output_folder), + TarballExtractionMode::SiblingTempDir => { + let temp_dir = get_atomic_dir_path(output_folder); + extract_tarball(data, &temp_dir)?; + rename_with_retries(&temp_dir, output_folder) + .map_err(AnyError::from) + .context("Failed moving extracted tarball to final destination.") + } + } +} + +fn rename_with_retries( + temp_dir: &Path, + output_folder: &Path, +) -> Result<(), std::io::Error> { + fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool { + // Windows will do an "Access is denied" error + err.kind() == ErrorKind::AlreadyExists || output_folder.exists() + } + + let mut count = 0; + // renaming might be flaky if a lot of processes are trying + // to do this, so retry a few times + loop { + match fs::rename(temp_dir, output_folder) { + Ok(_) => return Ok(()), + Err(err) if already_exists(&err, output_folder) => { + // another process copied here, just cleanup + let _ = fs::remove_dir_all(temp_dir); + return Ok(()); + } + Err(err) => { + count += 1; + if count > 5 { + // too many retries, cleanup and return the error + let _ = fs::remove_dir_all(temp_dir); + return Err(err); + } + + // wait a bit before retrying... this should be very rare or only + // in error cases, so ok to sleep a bit + let sleep_ms = std::cmp::min(100, 20 * count); + std::thread::sleep(std::time::Duration::from_millis(sleep_ms)); + } + } + } } fn verify_tarball_integrity( @@ -150,6 +209,7 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { #[cfg(test)] mod test { use deno_semver::Version; + use test_util::TempDir; use super::*; @@ -240,4 +300,25 @@ mod test { ) .is_ok()); } + + #[test] + fn rename_with_retries_succeeds_exists() { + let temp_dir = TempDir::new(); + let folder_1 = temp_dir.path().join("folder_1"); + let folder_2 = temp_dir.path().join("folder_2"); + + folder_1.create_dir_all(); + folder_1.join("a.txt").write("test"); + folder_2.create_dir_all(); + // this will not end up in the output as rename_with_retries assumes + // the folders ending up at the destination are the same + folder_2.join("b.txt").write("test2"); + + let dest_folder = temp_dir.path().join("dest_folder"); + + rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap(); + rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap(); + assert!(dest_folder.join("a.txt").exists()); + assert!(!dest_folder.join("b.txt").exists()); + } } diff --git a/cli/util/fs.rs b/cli/util/fs.rs index 92820ebe87..fdc7855e62 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use std::env::current_dir; -use std::fmt::Write as FmtWrite; use std::fs::FileType; use std::fs::OpenOptions; use std::io::Error; @@ -23,12 +22,12 @@ use deno_core::error::AnyError; pub use deno_core::normalize_path; use deno_core::unsync::spawn_blocking; use deno_core::ModuleSpecifier; -use deno_runtime::deno_crypto::rand; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::PathClean; use crate::util::gitignore::DirGitIgnores; use crate::util::gitignore::GitIgnoreTree; +use crate::util::path::get_atomic_file_path; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::progress_bar::ProgressMessagePrompt; @@ -56,14 +55,7 @@ pub fn atomic_write_file>( } fn inner(file_path: &Path, data: &[u8], mode: u32) -> std::io::Result<()> { - let temp_file_path = { - let rand: String = (0..4).fold(String::new(), |mut output, _| { - let _ = write!(output, "{:02x}", rand::random::()); - output - }); - let extension = format!("{rand}.tmp"); - file_path.with_extension(extension) - }; + let temp_file_path = get_atomic_file_path(file_path); if let Err(write_err) = atomic_write_file_raw(&temp_file_path, file_path, data, mode) diff --git a/cli/util/path.rs b/cli/util/path.rs index a3109ad04a..3c848edea7 100644 --- a/cli/util/path.rs +++ b/cli/util/path.rs @@ -42,6 +42,32 @@ pub fn get_extension(file_path: &Path) -> Option { .map(|e| e.to_lowercase()); } +pub fn get_atomic_dir_path(file_path: &Path) -> PathBuf { + let rand = gen_rand_path_component(); + let new_file_name = format!( + ".{}_{}", + file_path + .file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or(Cow::Borrowed("")), + rand + ); + file_path.with_file_name(new_file_name) +} + +pub fn get_atomic_file_path(file_path: &Path) -> PathBuf { + let rand = gen_rand_path_component(); + let extension = format!("{rand}.tmp"); + file_path.with_extension(extension) +} + +fn gen_rand_path_component() -> String { + (0..4).fold(String::new(), |mut output, _| { + output.push_str(&format!("{:02x}", rand::random::())); + output + }) +} + /// TypeScript figures out the type of file based on the extension, but we take /// other factors into account like the file headers. The hack here is to map the /// specifier passed to TypeScript to a new specifier with the file extension.