Auto merge of #9383 - alexcrichton:revert-git-head, r=ehuss

[beta] Revert #9133, moving to git HEAD dependencies by default

This PR reverts #9133 on the beta branch. The reason for this is that I would like to take some more time to investigate fixing https://github.com/rust-lang/cargo/issues/9352 and https://github.com/rust-lang/cargo/issues/9334. I think those should be relatively easy to fix but I would prefer to not be too rushed with the next release happening soon.

We realized today in the cargo meeting that this revert is likely to not have a ton of impact. For any projects using the v3 lock file format they'll continue to use it successfully. For projects on stable still using v2 they remain unaffected. This will ideally simply give some more time to fix these issues on nightly.
This commit is contained in:
bors 2021-04-20 22:27:05 +00:00
commit bf397482f2
11 changed files with 309 additions and 66 deletions

View File

@ -14,8 +14,10 @@ use crate::core::resolver::errors::describe_path;
use crate::core::resolver::types::{ConflictReason, DepInfo, FeaturesSet};
use crate::core::resolver::{ActivateError, ActivateResult, ResolveOpts};
use crate::core::{Dependency, FeatureValue, PackageId, PackageIdSpec, Registry, Summary};
use crate::core::{GitReference, SourceId};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::interning::InternedString;
use crate::util::Config;
use log::debug;
use std::cmp::Ordering;
use std::collections::{BTreeSet, HashMap, HashSet};
@ -38,6 +40,10 @@ pub struct RegistryQueryer<'a> {
>,
/// all the cases we ended up using a supplied replacement
used_replacements: HashMap<PackageId, Summary>,
/// Where to print warnings, if configured.
config: Option<&'a Config>,
/// Sources that we've already wared about possibly colliding in the future.
warned_git_collisions: HashSet<SourceId>,
}
impl<'a> RegistryQueryer<'a> {
@ -46,6 +52,7 @@ impl<'a> RegistryQueryer<'a> {
replacements: &'a [(PackageIdSpec, Dependency)],
try_to_use: &'a HashSet<PackageId>,
minimal_versions: bool,
config: Option<&'a Config>,
) -> Self {
RegistryQueryer {
registry,
@ -55,6 +62,8 @@ impl<'a> RegistryQueryer<'a> {
registry_cache: HashMap::new(),
summary_cache: HashMap::new(),
used_replacements: HashMap::new(),
config,
warned_git_collisions: HashSet::new(),
}
}
@ -66,6 +75,44 @@ impl<'a> RegistryQueryer<'a> {
self.used_replacements.get(&p)
}
/// Issues a future-compatible warning targeted at removing reliance on
/// unifying behavior between these two dependency directives:
///
/// ```toml
/// [dependencies]
/// a = { git = 'https://example.org/foo' }
/// a = { git = 'https://example.org/foo', branch = 'master }
/// ```
///
/// Historical versions of Cargo considered these equivalent but going
/// forward we'd like to fix this. For more details see the comments in
/// src/cargo/sources/git/utils.rs
fn warn_colliding_git_sources(&mut self, id: SourceId) -> CargoResult<()> {
let config = match self.config {
Some(config) => config,
None => return Ok(()),
};
let prev = match self.warned_git_collisions.replace(id) {
Some(prev) => prev,
None => return Ok(()),
};
match (id.git_reference(), prev.git_reference()) {
(Some(GitReference::DefaultBranch), Some(GitReference::Branch(b)))
| (Some(GitReference::Branch(b)), Some(GitReference::DefaultBranch))
if b == "master" => {}
_ => return Ok(()),
}
config.shell().warn(&format!(
"two git dependencies found for `{}` \
where one uses `branch = \"master\"` and the other doesn't; \
this will break in a future version of Cargo, so please \
ensure the dependency forms are consistent",
id.url(),
))?;
Ok(())
}
/// Queries the `registry` to return a list of candidates for `dep`.
///
/// This method is the location where overrides are taken into account. If
@ -73,6 +120,7 @@ impl<'a> RegistryQueryer<'a> {
/// applied by performing a second query for what the override should
/// return.
pub fn query(&mut self, dep: &Dependency) -> CargoResult<Rc<Vec<Summary>>> {
self.warn_colliding_git_sources(dep.source_id())?;
if let Some(out) = self.registry_cache.get(dep).cloned() {
return Ok(out);
}

View File

@ -133,7 +133,8 @@ pub fn resolve(
Some(config) => config.cli_unstable().minimal_versions,
None => false,
};
let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions);
let mut registry =
RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config);
let cx = activate_deps_loop(cx, &mut registry, summaries, config)?;
let mut cksums = HashMap::new();

View File

@ -409,6 +409,6 @@ impl Default for ResolveVersion {
/// file anyway so it takes the opportunity to bump the lock file version
/// forward.
fn default() -> ResolveVersion {
ResolveVersion::V3
ResolveVersion::V2
}
}

View File

@ -394,9 +394,45 @@ impl Ord for SourceId {
// Sort first based on `kind`, deferring to the URL comparison below if
// the kinds are equal.
match self.inner.kind.cmp(&other.inner.kind) {
Ordering::Equal => {}
other => return other,
match (&self.inner.kind, &other.inner.kind) {
(SourceKind::Path, SourceKind::Path) => {}
(SourceKind::Path, _) => return Ordering::Less,
(_, SourceKind::Path) => return Ordering::Greater,
(SourceKind::Registry, SourceKind::Registry) => {}
(SourceKind::Registry, _) => return Ordering::Less,
(_, SourceKind::Registry) => return Ordering::Greater,
(SourceKind::LocalRegistry, SourceKind::LocalRegistry) => {}
(SourceKind::LocalRegistry, _) => return Ordering::Less,
(_, SourceKind::LocalRegistry) => return Ordering::Greater,
(SourceKind::Directory, SourceKind::Directory) => {}
(SourceKind::Directory, _) => return Ordering::Less,
(_, SourceKind::Directory) => return Ordering::Greater,
(SourceKind::Git(a), SourceKind::Git(b)) => {
use GitReference::*;
let ord = match (a, b) {
(Tag(a), Tag(b)) => a.cmp(b),
(Tag(_), _) => Ordering::Less,
(_, Tag(_)) => Ordering::Greater,
(Rev(a), Rev(b)) => a.cmp(b),
(Rev(_), _) => Ordering::Less,
(_, Rev(_)) => Ordering::Greater,
// See module comments in src/cargo/sources/git/utils.rs
// for why `DefaultBranch` is treated specially here.
(Branch(a), DefaultBranch) => a.as_str().cmp("master"),
(DefaultBranch, Branch(b)) => "master".cmp(b),
(Branch(a), Branch(b)) => a.cmp(b),
(DefaultBranch, DefaultBranch) => Ordering::Equal,
};
if ord != Ordering::Equal {
return ord;
}
}
}
// If the `kind` and the `url` are equal, then for git sources we also
@ -473,9 +509,43 @@ impl fmt::Display for SourceId {
// The hash of SourceId is used in the name of some Cargo folders, so shouldn't
// vary. `as_str` gives the serialisation of a url (which has a spec) and so
// insulates against possible changes in how the url crate does hashing.
//
// Note that the semi-funky hashing here is done to handle `DefaultBranch`
// hashing the same as `"master"`, and also to hash the same as previous
// versions of Cargo while it's somewhat convenient to do so (that way all
// versions of Cargo use the same checkout).
impl Hash for SourceId {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.inner.kind.hash(into);
match &self.inner.kind {
SourceKind::Git(GitReference::Tag(a)) => {
0usize.hash(into);
0usize.hash(into);
a.hash(into);
}
SourceKind::Git(GitReference::Branch(a)) => {
0usize.hash(into);
1usize.hash(into);
a.hash(into);
}
// For now hash `DefaultBranch` the same way as `Branch("master")`,
// and for more details see module comments in
// src/cargo/sources/git/utils.rs for why `DefaultBranch`
SourceKind::Git(GitReference::DefaultBranch) => {
0usize.hash(into);
1usize.hash(into);
"master".hash(into);
}
SourceKind::Git(GitReference::Rev(a)) => {
0usize.hash(into);
2usize.hash(into);
a.hash(into);
}
SourceKind::Path => 1usize.hash(into),
SourceKind::Registry => 2usize.hash(into),
SourceKind::LocalRegistry => 3usize.hash(into),
SourceKind::Directory => 4usize.hash(into),
}
match self.inner.kind {
SourceKind::Git(_) => self.inner.canonical_url.hash(into),
_ => self.inner.url.as_str().hash(into),

View File

@ -15,12 +15,10 @@ use crate::core::registry::PackageRegistry;
use crate::core::resolver::features::{
FeatureOpts, FeatureResolver, ForceAllTargets, ResolvedFeatures,
};
use crate::core::resolver::{self, HasDevUnits, Resolve, ResolveOpts, ResolveVersion};
use crate::core::resolver::{self, HasDevUnits, Resolve, ResolveOpts};
use crate::core::summary::Summary;
use crate::core::Feature;
use crate::core::{
GitReference, PackageId, PackageIdSpec, PackageSet, Source, SourceId, Workspace,
};
use crate::core::{PackageId, PackageIdSpec, PackageSet, Source, SourceId, Workspace};
use crate::ops;
use crate::sources::PathSource;
use crate::util::errors::{CargoResult, CargoResultExt};
@ -601,31 +599,7 @@ fn register_previous_locks(
.deps_not_replaced(node)
.map(|p| p.0)
.filter(keep)
.collect::<Vec<_>>();
// In the v2 lockfile format and prior the `branch=master` dependency
// directive was serialized the same way as the no-branch-listed
// directive. Nowadays in Cargo, however, these two directives are
// considered distinct and are no longer represented the same way. To
// maintain compatibility with older lock files we register locked nodes
// for *both* the master branch and the default branch.
//
// Note that this is only applicable for loading older resolves now at
// this point. All new lock files are encoded as v3-or-later, so this is
// just compat for loading an old lock file successfully.
if resolve.version() <= ResolveVersion::V2 {
let source = node.source_id();
if let Some(GitReference::DefaultBranch) = source.git_reference() {
let new_source =
SourceId::for_git(source.url(), GitReference::Branch("master".to_string()))
.unwrap()
.with_precise(source.precise().map(|s| s.to_string()));
let node = node.with_source_id(new_source);
registry.register_lock(node, deps.clone());
}
}
.collect();
registry.register_lock(node, deps);
}

View File

@ -126,10 +126,12 @@ impl<'cfg> Source for GitSource<'cfg> {
// database, then try to resolve our reference with the preexisting
// repository.
(None, Some(db)) if self.config.offline() => {
let rev = db.resolve(&self.manifest_reference).with_context(|| {
"failed to lookup reference in preexisting repository, and \
let rev = db
.resolve(&self.manifest_reference, None)
.with_context(|| {
"failed to lookup reference in preexisting repository, and \
can't check for updates in offline mode (--offline)"
})?;
})?;
(db, rev)
}

View File

@ -1,5 +1,110 @@
//! Utilities for handling git repositories, mainly around
//! authentication/cloning.
//!
//! # `DefaultBranch` vs `Branch("master")`
//!
//! Long ago in a repository not so far away, an author (*cough* me *cough*)
//! didn't understand how branches work in Git. This led the author to
//! interpret these two dependency declarations the exact same way with the
//! former literally internally desugaring to the latter:
//!
//! ```toml
//! [dependencies]
//! foo = { git = "https://example.org/foo" }
//! foo = { git = "https://example.org/foo", branch = "master" }
//! ```
//!
//! It turns out there's this things called `HEAD` in git remotes which points
//! to the "main branch" of a repository, and the main branch is not always
//! literally called master. What Cargo would like to do is to differentiate
//! these two dependency directives, with the first meaning "depend on `HEAD`".
//!
//! Unfortunately implementing this is a breaking change. This was first
//! attempted in #8364 but resulted in #8468 which has two independent bugs
//! listed on that issue. Despite this breakage we would still like to roll out
//! this change in Cargo, but we're now going to take it very slow and try to
//! break as few people as possible along the way. These comments are intended
//! to log the current progress and what wonkiness you might see within Cargo
//! when handling `DefaultBranch` vs `Branch("master")`
//!
//! ### Repositories with `master` and a default branch
//!
//! This is one of the most obvious sources of breakage. If our `foo` example
//! in above had two branches, one called `master` and another which was
//! actually the main branch, then Cargo's change will always be a breaking
//! change. This is because what's downloaded is an entirely different branch
//! if we change the meaning of the dependency directive.
//!
//! It's expected this is quite rare, but to handle this case nonetheless when
//! Cargo fetches from a git remote and the dependency specification is
//! `DefaultBranch` then it will issue a warning if the `HEAD` reference
//! doesn't match `master`. It's expected in this situation that authors will
//! fix builds locally by specifying `branch = 'master'`.
//!
//! ### Differences in `cargo vendor` configuration
//!
//! When executing `cargo vendor` it will print out configuration which can
//! then be used to configure Cargo to use the `vendor` directory. Historically
//! this configuration looked like:
//!
//! ```toml
//! [source."https://example.org/foo"]
//! git = "https://example.org/foo"
//! branch = "master"
//! replace-with = "vendored-sources"
//! ```
//!
//! We would like to, however, transition this to not include the `branch =
//! "master"` unless the dependency directive actually mentions a branch.
//! Conveniently older Cargo implementations all interpret a missing `branch`
//! as `branch = "master"` so it's a backwards-compatible change to remove the
//! `branch = "master"` directive. As a result, `cargo vendor` will no longer
//! emit a `branch` if the git reference is `DefaultBranch`
//!
//! ### Differences in lock file formats
//!
//! Another issue pointed out in #8364 was that `Cargo.lock` files were no
//! longer compatible on stable and nightly with each other. The underlying
//! issue is that Cargo was serializing `branch = "master"` *differently* on
//! nightly than it was on stable. Historical implementations of Cargo would
//! encode `DefaultBranch` and `Branch("master")` the same way in `Cargo.lock`,
//! so when reading a lock file we have no way of differentiating between the
//! two.
//!
//! To handle this difference in encoding of `Cargo.lock` we'll be employing
//! the standard scheme to change `Cargo.lock`:
//!
//! * Add support in Cargo for a future format, don't turn it on.
//! * Wait a long time
//! * Turn on the future format
//!
//! Here the "future format" is `branch=master` shows up if you have a `branch`
//! in `Cargo.toml`, and otherwise nothing shows up in URLs. Due to the effect
//! on crate graph resolution, however, this flows into the next point..
//!
//! ### Unification in the Cargo dependency graph
//!
//! Today dependencies with `branch = "master"` will unify with dependencies
//! that say nothing. (that's because the latter simply desugars). This means
//! the two `foo` directives above will resolve to the same dependency.
//!
//! The best idea I've got to fix this is to basically get everyone (if anyone)
//! to stop doing this today. The crate graph resolver will start to warn if it
//! detects that multiple `Cargo.toml` directives are detected and mixed. The
//! thinking is that when we turn on the new lock file format it'll also be
//! hard breaking change for any project which still has dependencies to
//! both the `master` branch and not.
//!
//! ### What we're doing today
//!
//! The general goal of Cargo today is to internally distinguish
//! `DefaultBranch` and `Branch("master")`, but for the time being they should
//! be functionally equivalent in terms of builds. The hope is that we'll let
//! all these warnings and such bake for a good long time, and eventually we'll
//! flip some switches if your build has no warnings it'll work before and
//! after.
//!
//! That's the dream at least, we'll see how this plays out.
use crate::core::GitReference;
use crate::util::errors::{CargoResult, CargoResultExt};
@ -77,7 +182,7 @@ impl GitRemote {
}
pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<git2::Oid> {
reference.resolve(&self.db_at(path)?.repo)
reference.resolve(&self.db_at(path)?.repo, None)
}
pub fn checkout(
@ -102,7 +207,7 @@ impl GitRemote {
}
}
None => {
if let Ok(rev) = reference.resolve(&db.repo) {
if let Ok(rev) = reference.resolve(&db.repo, Some((&self.url, cargo_config))) {
return Ok((db, rev));
}
}
@ -121,7 +226,7 @@ impl GitRemote {
.context(format!("failed to clone into: {}", into.display()))?;
let rev = match locked_rev {
Some(rev) => rev,
None => reference.resolve(&repo)?,
None => reference.resolve(&repo, Some((&self.url, cargo_config)))?,
};
Ok((
@ -190,13 +295,21 @@ impl GitDatabase {
self.repo.revparse_single(&oid.to_string()).is_ok()
}
pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
r.resolve(&self.repo)
pub fn resolve(
&self,
r: &GitReference,
remote_and_config: Option<(&Url, &Config)>,
) -> CargoResult<git2::Oid> {
r.resolve(&self.repo, remote_and_config)
}
}
impl GitReference {
pub fn resolve(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
pub fn resolve(
&self,
repo: &git2::Repository,
remote_and_config: Option<(&Url, &Config)>,
) -> CargoResult<git2::Oid> {
let id = match self {
// Note that we resolve the named tag here in sync with where it's
// fetched into via `fetch` below.
@ -221,11 +334,38 @@ impl GitReference {
.ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
}
// We'll be using the HEAD commit
// See the module docs for why we're using `master` here.
GitReference::DefaultBranch => {
let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
let head = repo.find_object(head_id, None)?;
head.peel(ObjectType::Commit)?.id()
let master = repo
.find_branch("origin/master", git2::BranchType::Remote)
.chain_err(|| "failed to find branch `master`")?;
let master = master
.get()
.target()
.ok_or_else(|| anyhow::format_err!("branch `master` did not have a target"))?;
if let Some((remote, config)) = remote_and_config {
let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
let head = repo.find_object(head_id, None)?;
let head = head.peel(ObjectType::Commit)?.id();
if head != master {
config.shell().warn(&format!(
"\
fetching `master` branch from `{}` but the `HEAD` \
reference for this repository is not the \
`master` branch. This behavior will change \
in Cargo in the future and your build may \
break, so it's recommended to place \
`branch = \"master\"` in Cargo.toml when \
depending on this git repository to ensure \
that your build will continue to work.\
",
remote,
))?;
}
}
master
}
GitReference::Rev(s) => {
@ -759,6 +899,8 @@ pub fn fetch(
}
GitReference::DefaultBranch => {
// See the module docs for why we're fetching `master` here.
refspecs.push(String::from("refs/heads/master:refs/remotes/origin/master"));
refspecs.push(String::from("HEAD:refs/remotes/origin/HEAD"));
}
@ -1024,7 +1166,10 @@ fn github_up_to_date(
handle.useragent("cargo")?;
let mut headers = List::new();
headers.append("Accept: application/vnd.github.3.sha")?;
headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?;
headers.append(&format!(
"If-None-Match: \"{}\"",
reference.resolve(repo, None)?
))?;
handle.http_headers(headers)?;
handle.perform()?;
Ok(handle.response_code()? == 304)

View File

@ -106,7 +106,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
fn head(&self) -> CargoResult<git2::Oid> {
if self.head.get().is_none() {
let repo = self.repo()?;
let oid = self.index_git_ref.resolve(repo)?;
let oid = self.index_git_ref.resolve(repo, None)?;
self.head.set(Some(oid));
}
Ok(self.head.get().unwrap())

View File

@ -2805,7 +2805,7 @@ fn default_not_master() {
// Then create a commit on the new `main` branch so `master` and `main`
// differ.
git_project.change_file("src/lib.rs", "pub fn bar() {}");
git_project.change_file("src/lib.rs", "");
git::add(&repo);
git::commit(&repo);
@ -2817,13 +2817,14 @@ fn default_not_master() {
[project]
name = "foo"
version = "0.5.0"
[dependencies]
dep1 = {{ git = '{}' }}
"#,
git_project.url()
),
)
.file("src/lib.rs", "pub fn foo() { dep1::bar() }")
.file("src/lib.rs", "pub fn foo() { dep1::foo() }")
.build();
project
@ -2831,6 +2832,14 @@ fn default_not_master() {
.with_stderr(
"\
[UPDATING] git repository `[..]`
warning: fetching `master` branch from `[..]` but the `HEAD` \
reference for this repository is not the \
`master` branch. This behavior will change \
in Cargo in the future and your build may \
break, so it's recommended to place \
`branch = \"master\"` in Cargo.toml when \
depending on this git repository to ensure \
that your build will continue to work.
[COMPILING] dep1 v0.5.0 ([..])
[COMPILING] foo v0.5.0 ([..])
[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]",
@ -2968,6 +2977,7 @@ fn two_dep_forms() {
[project]
name = "foo"
version = "0.5.0"
[dependencies]
dep1 = {{ git = '{}', branch = 'master' }}
a = {{ path = 'a' }}
@ -2983,6 +2993,7 @@ fn two_dep_forms() {
[project]
name = "a"
version = "0.5.0"
[dependencies]
dep1 = {{ git = '{}' }}
"#,
@ -2992,16 +3003,15 @@ fn two_dep_forms() {
.file("a/src/lib.rs", "")
.build();
// This'll download the git repository twice, one with HEAD and once with
// the master branch. Then it'll compile 4 crates, the 2 git deps, then
// the two local deps.
project
.cargo("build")
.with_stderr(
"\
[UPDATING] [..]
[UPDATING] [..]
[COMPILING] [..]
warning: two git dependencies found for `[..]` where one uses `branch = \"master\"` \
and the other doesn't; this will break in a future version of Cargo, so please \
ensure the dependency forms are consistent
warning: [..]
[COMPILING] [..]
[COMPILING] [..]
[COMPILING] [..]

View File

@ -26,8 +26,6 @@ fn oldest_lockfile_still_works_with_command(cargo_command: &str) {
let expected_lockfile = r#"# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "bar"
version = "0.1.0"
@ -172,8 +170,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
assert_lockfiles_eq(
r#"# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "bar"
version = "0.1.0"
@ -412,8 +408,6 @@ fn current_lockfile_format() {
let expected = "\
# This file is automatically @generated by Cargo.\n# It is not intended for manual editing.
version = 3
[[package]]
name = \"bar\"
version = \"0.1.0\"
@ -474,8 +468,6 @@ dependencies = [
assert_lockfiles_eq(
r#"# [..]
# [..]
version = 3
[[package]]
name = "bar"
version = "0.1.0"

View File

@ -584,7 +584,8 @@ fn preserve_top_comment() {
let mut lines = lockfile.lines().collect::<Vec<_>>();
lines.insert(2, "# some other comment");
let mut lockfile = lines.join("\n");
lockfile.push('\n'); // .lines/.join loses the last newline
lockfile.push_str("\n\n"); // .lines/.join loses the last newline
// >>>>>>> parent of 7dd9872c1... Change git dependencies to use `HEAD` by default
println!("saving Cargo.lock contents:\n{}", lockfile);
p.change_file("Cargo.lock", &lockfile);