mirror of
https://github.com/rust-lang/cargo
synced 2024-10-30 03:28:38 +00:00
Auto merge of #13818 - osiewicz:clean-package-perf-improvements, r=weihanglo
Clean package perf improvements ### What does this PR try to resolve? I've noticed that `cargo clean -p` execution time scales poorly with size of target directory; in my case (~250GB target directory on M1 Mac) running `cargo clean -p` takes circa 35 seconds. Notably, it's the file listing that takes that time, not deleting the package itself. That is, when running `cargo clean -p SOME_PACKAGE` twice in a row, both executions take roughly the same time. I've tracked it down to the fact that we seem quite happy to use `glob::glob` function, which iterates over contents of target dir. It also was a bit sub-optimal when it came to doing that, for which I've already filled a PR in https://github.com/rust-lang/glob/pull/144 - that PR alone takes down cleaning time down to ~14 seconds. While it is a good improvement for a relatively straightforward change, this PR tries to take it even further. With glob PR applied + changes from this PR, my test case goes down to ~6 seconds. I'm pretty sure that we could squeeze this further, but I'd rather do so in a follow-up PR. Notably, this PR doesn't help with *just* super-large target directories. `cargo clean -p serde` on cargo repo (with ~7Gb target directory size) went down from ~380ms to ~100ms for me. Not too shabby. ### How should we test and review this PR? I've mostly tested it manually, running `cargo clean` against multiple different repos. ### Additional information TODO: - [x] [c770700
](c770700885
) is not quite correct; we need to consider that it changes how progress reporting works; as is, we're gonna report all progress relatively quickly and stall at the end (when we're actually iterating over directories, globbing, removing files, that kind of jazz). I'll address that.
This commit is contained in:
commit
27d3e3d603
1 changed files with 59 additions and 16 deletions
|
@ -8,8 +8,10 @@ use crate::util::interning::InternedString;
|
|||
use crate::util::{human_readable_bytes, GlobalContext, Progress, ProgressStyle};
|
||||
use anyhow::bail;
|
||||
use cargo_util::paths;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::rc::Rc;
|
||||
|
||||
pub struct CleanOptions<'gctx> {
|
||||
pub gctx: &'gctx GlobalContext,
|
||||
|
@ -169,6 +171,9 @@ fn clean_specs(
|
|||
|
||||
clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len()));
|
||||
|
||||
// Try to reduce the amount of times we iterate over the same target directory by storing away
|
||||
// the directories we've iterated over (and cleaned for a given package).
|
||||
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
|
||||
for pkg in packages {
|
||||
let pkg_dir = format!("{}-*", pkg.name());
|
||||
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
|
||||
|
@ -192,7 +197,9 @@ fn clean_specs(
|
|||
}
|
||||
continue;
|
||||
}
|
||||
let crate_name = target.crate_name();
|
||||
let crate_name: Rc<str> = target.crate_name().into();
|
||||
let path_dot: &str = &format!("{crate_name}.");
|
||||
let path_dash: &str = &format!("{crate_name}-");
|
||||
for &mode in &[
|
||||
CompileMode::Build,
|
||||
CompileMode::Test,
|
||||
|
@ -212,28 +219,15 @@ fn clean_specs(
|
|||
TargetKind::Test | TargetKind::Bench => (layout.deps(), None),
|
||||
_ => (layout.deps(), Some(layout.dest())),
|
||||
};
|
||||
let mut dir_glob_str = escape_glob_path(dir)?;
|
||||
let dir_glob = Path::new(&dir_glob_str);
|
||||
for file_type in file_types {
|
||||
// Some files include a hash in the filename, some don't.
|
||||
let hashed_name = file_type.output_filename(target, Some("*"));
|
||||
let unhashed_name = file_type.output_filename(target, None);
|
||||
let dir_glob = escape_glob_path(dir)?;
|
||||
let dir_glob = Path::new(&dir_glob);
|
||||
|
||||
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
|
||||
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
|
||||
// Remove dep-info file generated by rustc. It is not tracked in
|
||||
// file_types. It does not have a prefix.
|
||||
let hashed_dep_info = dir_glob.join(format!("{}-*.d", crate_name));
|
||||
clean_ctx.rm_rf_glob(&hashed_dep_info)?;
|
||||
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
|
||||
clean_ctx.rm_rf(&unhashed_dep_info)?;
|
||||
// Remove split-debuginfo files generated by rustc.
|
||||
let split_debuginfo_obj = dir_glob.join(format!("{}.*.o", crate_name));
|
||||
clean_ctx.rm_rf_glob(&split_debuginfo_obj)?;
|
||||
let split_debuginfo_dwo = dir_glob.join(format!("{}.*.dwo", crate_name));
|
||||
clean_ctx.rm_rf_glob(&split_debuginfo_dwo)?;
|
||||
let split_debuginfo_dwp = dir_glob.join(format!("{}.*.dwp", crate_name));
|
||||
clean_ctx.rm_rf_glob(&split_debuginfo_dwp)?;
|
||||
|
||||
// Remove the uplifted copy.
|
||||
if let Some(uplift_dir) = uplift_dir {
|
||||
|
@ -244,6 +238,31 @@ fn clean_specs(
|
|||
clean_ctx.rm_rf(&dep_info)?;
|
||||
}
|
||||
}
|
||||
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
|
||||
clean_ctx.rm_rf(&unhashed_dep_info)?;
|
||||
|
||||
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
|
||||
dir_glob_str.push(std::path::MAIN_SEPARATOR);
|
||||
}
|
||||
dir_glob_str.push('*');
|
||||
let dir_glob_str: Rc<str> = dir_glob_str.into();
|
||||
if cleaned_packages
|
||||
.entry(dir_glob_str.clone())
|
||||
.or_default()
|
||||
.insert(crate_name.clone())
|
||||
{
|
||||
let paths = [
|
||||
// Remove dep-info file generated by rustc. It is not tracked in
|
||||
// file_types. It does not have a prefix.
|
||||
(path_dash, ".d"),
|
||||
// Remove split-debuginfo files generated by rustc.
|
||||
(path_dot, ".o"),
|
||||
(path_dot, ".dwo"),
|
||||
(path_dot, ".dwp"),
|
||||
];
|
||||
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
|
||||
}
|
||||
|
||||
// TODO: what to do about build_script_build?
|
||||
let dir = escape_glob_path(layout.incremental())?;
|
||||
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
|
||||
|
@ -322,6 +341,30 @@ impl<'gctx> CleanContext<'gctx> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
|
||||
///
|
||||
/// This function iterates over files matching a glob (`pattern`) and removes those whose
|
||||
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
|
||||
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
|
||||
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
|
||||
fn rm_rf_prefix_list(
|
||||
&mut self,
|
||||
pattern: &str,
|
||||
path_matchers: &[(&str, &str)],
|
||||
) -> CargoResult<()> {
|
||||
for path in glob::glob(pattern)? {
|
||||
let path = path?;
|
||||
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
|
||||
if path_matchers
|
||||
.iter()
|
||||
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
|
||||
{
|
||||
self.rm_rf(&path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
|
||||
let meta = match fs::symlink_metadata(path) {
|
||||
Ok(meta) => meta,
|
||||
|
|
Loading…
Reference in a new issue