Auto merge of #8095 - ehuss:paths-walking, r=alexcrichton

Updates to path source walking.

This is a collection of loosely related changes to path source walking:

* Add more context to error messages.
* Allow `package.exclude` patterns to match directories. Previously, the walker would recurse into the directory, and skip every file. Instead, just skip the whole directory. This can be helpful if the directory is not readable, or otherwise want to avoid walking.
* Don't require `Cargo.toml` to be in root of a git repo in order to use git to guide the selection. I'm not sure I understand the original reasoning that (any) `Cargo.toml` had to reside next to the `.git` directory.

The last is a moderately risky change, since it's hard to predict how this might affect more complex project layouts or new interactions with `.gitignore` that didn't exist before. Also, I'm wondering if it should just ignore if it fails to open the repo instead of emitting an error?

Closes #1729
Closes #6188
Closes #8092
This commit is contained in:
bors 2020-04-24 18:35:25 +00:00
commit ef3cbfcb5d
6 changed files with 195 additions and 70 deletions

View file

@ -32,7 +32,7 @@ pretty_env_logger = { version = "0.4", optional = true }
anyhow = "1.0"
filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.13.1"
git2 = "0.13.5"
git2-curl = "0.14.0"
glob = "0.3.0"
hex = "0.4"
@ -44,7 +44,7 @@ jobserver = "0.1.21"
lazycell = "1.2.0"
libc = "0.2"
log = "0.4.6"
libgit2-sys = "0.12.1"
libgit2-sys = "0.12.5"
memchr = "2.1.3"
num_cpus = "1.0"
opener = "0.4"

View file

@ -1209,7 +1209,12 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
let target_root = target_root(cx);
let local = if unit.mode.is_doc() {
// rustdoc does not have dep-info files.
let fingerprint = pkg_fingerprint(cx.bcx, &unit.pkg)?;
let fingerprint = pkg_fingerprint(cx.bcx, &unit.pkg).chain_err(|| {
format!(
"failed to determine package fingerprint for documenting {}",
unit.pkg
)
})?;
vec![LocalFingerprint::Precalculated(fingerprint)]
} else {
let dep_info = dep_info_loc(cx, unit);
@ -1270,7 +1275,18 @@ fn calculate_run_custom_build(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoRes
// the whole crate.
let (gen_local, overridden) = build_script_local_fingerprints(cx, unit);
let deps = &cx.build_explicit_deps[unit];
let local = (gen_local)(deps, Some(&|| pkg_fingerprint(cx.bcx, &unit.pkg)))?.unwrap();
let local = (gen_local)(
deps,
Some(&|| {
pkg_fingerprint(cx.bcx, &unit.pkg).chain_err(|| {
format!(
"failed to determine package fingerprint for build script for {}",
unit.pkg
)
})
}),
)?
.unwrap();
let output = deps.build_script_output.clone();
// Include any dependencies of our execution, which is typically just the

View file

@ -96,6 +96,15 @@ impl<'cfg> PathSource<'cfg> {
/// are relevant for building this package, but it also contains logic to
/// use other methods like .gitignore to filter the list of files.
pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
self._list_files(pkg).chain_err(|| {
format!(
"failed to determine list of files in {}",
pkg.root().display()
)
})
}
fn _list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
let root = pkg.root();
let no_include_option = pkg.manifest().include().is_empty();
@ -111,17 +120,21 @@ impl<'cfg> PathSource<'cfg> {
}
let ignore_include = include_builder.build()?;
let ignore_should_package = |relative_path: &Path| -> CargoResult<bool> {
let ignore_should_package = |relative_path: &Path, is_dir: bool| -> CargoResult<bool> {
// "Include" and "exclude" options are mutually exclusive.
if no_include_option {
match ignore_exclude
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
match ignore_exclude.matched_path_or_any_parents(relative_path, is_dir) {
Match::None => Ok(true),
Match::Ignore(_) => Ok(false),
Match::Whitelist(_) => Ok(true),
}
} else {
if is_dir {
// Generally, include directives don't list every
// directory (nor should they!). Just skip all directory
// checks, and only check files.
return Ok(true);
}
match ignore_include
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
@ -132,7 +145,7 @@ impl<'cfg> PathSource<'cfg> {
}
};
let mut filter = |path: &Path| -> CargoResult<bool> {
let mut filter = |path: &Path, is_dir: bool| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;
let rel = relative_path.as_os_str();
@ -142,13 +155,13 @@ impl<'cfg> PathSource<'cfg> {
return Ok(true);
}
ignore_should_package(relative_path)
ignore_should_package(relative_path, is_dir)
};
// Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
if no_include_option {
if let Some(result) = self.discover_git_and_list_files(pkg, root, &mut filter) {
return result;
if let Some(result) = self.discover_git_and_list_files(pkg, root, &mut filter)? {
return Ok(result);
}
// no include option and not git repo discovered (see rust-lang/cargo#7183).
return self.list_files_walk_except_dot_files_and_dirs(pkg, &mut filter);
@ -162,50 +175,48 @@ impl<'cfg> PathSource<'cfg> {
&self,
pkg: &Package,
root: &Path,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
) -> Option<CargoResult<Vec<PathBuf>>> {
// If this package is in a Git repository, then we really do want to
// query the Git repository as it takes into account items such as
// `.gitignore`. We're not quite sure where the Git repository is,
// however, so we do a bit of a probe.
//
// We walk this package's path upwards and look for a sibling
// `Cargo.toml` and `.git` directory. If we find one then we assume that
// we're part of that repository.
let mut cur = root;
loop {
if cur.join("Cargo.toml").is_file() {
// If we find a Git repository next to this `Cargo.toml`, we still
// check to see if we are indeed part of the index. If not, then
// this is likely an unrelated Git repo, so keep going.
if let Ok(repo) = git2::Repository::open(cur) {
let index = match repo.index() {
Ok(index) => index,
Err(err) => return Some(Err(err.into())),
};
let path = root.strip_prefix(cur).unwrap().join("Cargo.toml");
if index.get_path(&path, 0).is_some() {
return Some(self.list_files_git(pkg, &repo, filter));
}
}
}
// Don't cross submodule boundaries.
if cur.join(".git").is_dir() {
break;
}
match cur.parent() {
Some(parent) => cur = parent,
None => break,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Option<Vec<PathBuf>>> {
let repo = match git2::Repository::discover(root) {
Ok(repo) => repo,
Err(e) => {
log::debug!(
"could not discover git repo at or above {}: {}",
root.display(),
e
);
return Ok(None);
}
};
let index = repo
.index()
.chain_err(|| format!("failed to open git index at {}", repo.path().display()))?;
let repo_root = repo.workdir().ok_or_else(|| {
anyhow::format_err!(
"did not expect repo at {} to be bare",
repo.path().display()
)
})?;
let repo_relative_path = root.strip_prefix(repo_root).chain_err(|| {
format!(
"expected git repo {} to be parent of package {}",
repo.path().display(),
root.display()
)
})?;
let manifest_path = repo_relative_path.join("Cargo.toml");
if index.get_path(&manifest_path, 0).is_some() {
return Ok(Some(self.list_files_git(pkg, &repo, filter)?));
}
None
// Package Cargo.toml is not in git, don't use git to guide our selection.
Ok(None)
}
fn list_files_git(
&self,
pkg: &Package,
repo: &git2::Repository,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
warn!("list_files_git {}", pkg.package_id());
let index = repo.index()?;
@ -289,7 +300,10 @@ impl<'cfg> PathSource<'cfg> {
continue;
}
if is_dir.unwrap_or_else(|| file_path.is_dir()) {
// `is_dir` is None for symlinks. The `unwrap` checks if the
// symlink points to a directory.
let is_dir = is_dir.unwrap_or_else(|| file_path.is_dir());
if is_dir {
warn!(" found submodule {}", file_path.display());
let rel = file_path.strip_prefix(root)?;
let rel = rel.to_str().ok_or_else(|| {
@ -307,7 +321,8 @@ impl<'cfg> PathSource<'cfg> {
PathSource::walk(&file_path, &mut ret, false, filter)?;
}
}
} else if (*filter)(&file_path)? {
} else if (*filter)(&file_path, is_dir)? {
assert!(!is_dir);
// We found a file!
warn!(" found {}", file_path.display());
ret.push(file_path);
@ -338,29 +353,28 @@ impl<'cfg> PathSource<'cfg> {
fn list_files_walk_except_dot_files_and_dirs(
&self,
pkg: &Package,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
let root = pkg.root();
let mut exclude_dot_files_dir_builder = GitignoreBuilder::new(root);
exclude_dot_files_dir_builder.add_line(None, ".*")?;
let ignore_dot_files_and_dirs = exclude_dot_files_dir_builder.build()?;
let mut filter_ignore_dot_files_and_dirs = |path: &Path| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;
match ignore_dot_files_and_dirs
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
Match::Ignore(_) => Ok(false),
_ => filter(path),
}
};
let mut filter_ignore_dot_files_and_dirs =
|path: &Path, is_dir: bool| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;
match ignore_dot_files_and_dirs.matched_path_or_any_parents(relative_path, is_dir) {
Match::Ignore(_) => Ok(false),
_ => filter(path, is_dir),
}
};
self.list_files_walk(pkg, &mut filter_ignore_dot_files_and_dirs)
}
fn list_files_walk(
&self,
pkg: &Package,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
let mut ret = Vec::new();
PathSource::walk(pkg.root(), &mut ret, true, filter)?;
@ -371,12 +385,14 @@ impl<'cfg> PathSource<'cfg> {
path: &Path,
ret: &mut Vec<PathBuf>,
is_root: bool,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<()> {
if !path.is_dir() {
if (*filter)(path)? {
ret.push(path.to_path_buf());
}
let is_dir = path.is_dir();
if !is_root && !(*filter)(path, is_dir)? {
return Ok(());
}
if !is_dir {
ret.push(path.to_path_buf());
return Ok(());
}
// Don't recurse into any sub-packages that we have.
@ -415,7 +431,12 @@ impl<'cfg> PathSource<'cfg> {
let mut max = FileTime::zero();
let mut max_path = PathBuf::new();
for file in self.list_files(pkg)? {
for file in self.list_files(pkg).chain_err(|| {
format!(
"failed to determine the most recently modified file in {}",
pkg.root().display()
)
})? {
// An `fs::stat` error here is either because path is a
// broken symlink, a permissions error, or a race
// condition where this path was `rm`-ed -- either way,

View file

@ -3977,7 +3977,9 @@ fn links_interrupted_can_restart() {
fn build_script_scan_eacces() {
// build.rs causes a scan of the whole project, which can be a problem if
// a directory is not accessible.
use cargo_test_support::git;
use std::os::unix::fs::PermissionsExt;
let p = project()
.file("src/lib.rs", "")
.file("build.rs", "fn main() {}")
@ -3985,12 +3987,21 @@ fn build_script_scan_eacces() {
.build();
let path = p.root().join("secrets");
fs::set_permissions(&path, fs::Permissions::from_mode(0)).unwrap();
// "Caused by" is a string from libc such as the following:
// The last "Caused by" is a string from libc such as the following:
// Permission denied (os error 13)
p.cargo("build")
.with_stderr(
"\
[ERROR] cannot read \"[..]/foo/secrets\"
[ERROR] failed to determine package fingerprint for build script for foo v0.0.1 ([..]/foo)
Caused by:
failed to determine the most recently modified file in [..]/foo
Caused by:
failed to determine list of files in [..]/foo
Caused by:
cannot read \"[..]/foo/secrets\"
Caused by:
[..]
@ -3998,5 +4009,28 @@ Caused by:
)
.with_status(101)
.run();
// Try `package.exclude` to skip a directory.
p.change_file(
"Cargo.toml",
r#"
[package]
name = "foo"
version = "0.0.1"
exclude = ["secrets"]
"#,
);
p.cargo("build").run();
// Try with git. This succeeds because the git status walker ignores
// directories it can't access.
p.change_file("Cargo.toml", &basic_manifest("foo", "0.0.1"));
p.build_dir().rm_rf();
let repo = git::init(&p.root());
git::add(&repo);
git::commit(&repo);
p.cargo("build").run();
// Restore permissions so that the directory can be deleted.
fs::set_permissions(&path, fs::Permissions::from_mode(0o755)).unwrap();
}

View file

@ -6,7 +6,7 @@ use cargo_test_support::{
basic_manifest, cargo_process, git, path2url, paths, project, publish::validate_crate_contents,
registry, symlink_supported, t,
};
use std::fs::{read_to_string, File};
use std::fs::{self, read_to_string, File};
use std::path::Path;
#[cargo_test]
@ -1691,3 +1691,56 @@ fn package_restricted_windows() {
)
.run();
}
#[cargo_test]
fn finds_git_in_parent() {
// Test where `Cargo.toml` is not in the root of the git repo.
let repo_path = paths::root().join("repo");
fs::create_dir(&repo_path).unwrap();
let p = project()
.at("repo/foo")
.file("Cargo.toml", &basic_manifest("foo", "0.1.0"))
.file("src/lib.rs", "")
.build();
let repo = git::init(&repo_path);
git::add(&repo);
git::commit(&repo);
p.change_file("ignoreme", "");
p.change_file("ignoreme2", "");
p.cargo("package --list --allow-dirty")
.with_stdout(
"\
Cargo.toml
Cargo.toml.orig
ignoreme
ignoreme2
src/lib.rs
",
)
.run();
p.change_file(".gitignore", "ignoreme");
p.cargo("package --list --allow-dirty")
.with_stdout(
"\
.gitignore
Cargo.toml
Cargo.toml.orig
ignoreme2
src/lib.rs
",
)
.run();
fs::write(repo_path.join(".gitignore"), "ignoreme2").unwrap();
p.cargo("package --list --allow-dirty")
.with_stdout(
"\
.gitignore
Cargo.toml
Cargo.toml.orig
src/lib.rs
",
)
.run();
}

View file

@ -469,6 +469,7 @@ fn ignore_lockfile_inner() {
"\
[PACKAGING] bar v0.0.1 ([..])
[ARCHIVING] .cargo_vcs_info.json
[ARCHIVING] .gitignore
[ARCHIVING] Cargo.lock
[ARCHIVING] Cargo.toml
[ARCHIVING] Cargo.toml.orig