fix(git): represent deferred and resolved revision with an enum

`cargo update --precise` might pass in any arbitrary Git reference,
and `git2::Oid::from_str` would always zero-pad the given str if it is
not a full SHA hex string.

This introduces an enum `Revision` to represent `locked_rev`
that is either deferred or resolved to an actual object ID.
When `locked_rev` is a short ID or any other Git reference,
Cargo first performs a Git fetch to resolve it (without --offline),
and then locks it to an actual commit object ID.
This commit is contained in:
Weihang Lo 2024-01-04 11:00:09 -05:00
parent 4be3614264
commit 2734097f12
No known key found for this signature in database
GPG key ID: D7DBF189825E82E7
5 changed files with 107 additions and 66 deletions

View file

@ -460,21 +460,14 @@ impl SourceId {
}
pub fn precise_git_fragment(self) -> Option<&'static str> {
match &self.inner.precise {
Some(Precise::GitUrlFragment(s)) => Some(&s[..8]),
_ => None,
}
self.precise_full_git_fragment().map(|s| &s[..8])
}
pub fn precise_git_oid(self) -> CargoResult<Option<git2::Oid>> {
Ok(match self.inner.precise.as_ref() {
Some(Precise::GitUrlFragment(s)) => {
Some(git2::Oid::from_str(s).with_context(|| {
format!("precise value for git is not a git revision: {}", s)
})?)
}
pub fn precise_full_git_fragment(self) -> Option<&'static str> {
match &self.inner.precise {
Some(Precise::GitUrlFragment(s)) => Some(&s),
_ => None,
})
}
}
/// Creates a new `SourceId` from this source with the given `precise`.

View file

@ -71,8 +71,9 @@ pub struct GitSource<'cfg> {
/// The Git reference from the manifest file.
manifest_reference: GitReference,
/// The revision which a git source is locked to.
/// This is expected to be set after the Git repository is fetched.
locked_rev: Option<git2::Oid>,
///
/// Expected to always be [`Revision::Locked`] after the Git repository is fetched.
locked_rev: Revision,
/// The unique identifier of this source.
source_id: SourceId,
/// The underlying path source to discover packages inside the Git repository.
@ -103,7 +104,11 @@ impl<'cfg> GitSource<'cfg> {
let remote = GitRemote::new(source_id.url());
let manifest_reference = source_id.git_reference().unwrap().clone();
let locked_rev = source_id.precise_git_oid()?;
let locked_rev = source_id
.precise_full_git_fragment()
.map(|s| Revision::new(s.into()))
.unwrap_or_else(|| source_id.git_reference().unwrap().clone().into());
let ident = ident_shallow(
&source_id,
config
@ -155,6 +160,49 @@ impl<'cfg> GitSource<'cfg> {
}
}
/// Indicates a [Git revision] that might be locked or deferred to be resolved.
///
/// [Git revision]: https://git-scm.com/docs/revisions
#[derive(Clone, Debug)]
enum Revision {
/// A [Git reference] that would trigger extra fetches when being resolved.
///
/// [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References
Deferred(GitReference),
/// A locked revision of the actual Git commit object ID.
Locked(git2::Oid),
}
impl Revision {
fn new(rev: &str) -> Revision {
let oid = git2::Oid::from_str(rev).ok();
match oid {
// Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes.
// Its length must be double to the underlying bytes (40 or 64),
// otherwise libgit2 would happily zero-pad the returned oid.
// See rust-lang/cargo#13188
Some(oid) if oid.as_bytes().len() * 2 == rev.len() => Revision::Locked(oid),
_ => Revision::Deferred(GitReference::Rev(rev.to_string())),
}
}
}
impl From<GitReference> for Revision {
fn from(value: GitReference) -> Self {
Revision::Deferred(value)
}
}
impl From<Revision> for GitReference {
fn from(value: Revision) -> Self {
match value {
Revision::Deferred(git_ref) => git_ref,
Revision::Locked(oid) => GitReference::Rev(oid.to_string()),
}
}
}
/// Create an identifier from a URL,
/// essentially turning `proto://host/path/repo` into `repo-<hash-of-url>`.
fn ident(id: &SourceId) -> String {
@ -252,16 +300,17 @@ impl<'cfg> Source for GitSource<'cfg> {
let db_path = db_path.into_path_unlocked();
let db = self.remote.db_at(&db_path).ok();
let (db, actual_rev) = match (self.locked_rev, db) {
let (db, actual_rev) = match (&self.locked_rev, db) {
// If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
(Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid),
// If we're in offline mode, we're not locked, and we have a
// database, then try to resolve our reference with the preexisting
// repository.
(None, Some(db)) if self.config.offline() => {
let rev = db.resolve(&self.manifest_reference).with_context(|| {
(Revision::Deferred(git_ref), Some(db)) if self.config.offline() => {
let rev = db.resolve(&git_ref).with_context(|| {
"failed to lookup reference in preexisting repository, and \
can't check for updates in offline mode (--offline)"
})?;
@ -279,6 +328,7 @@ impl<'cfg> Source for GitSource<'cfg> {
self.remote.url()
);
}
if !self.quiet {
self.config.shell().status(
"Updating",
@ -288,13 +338,9 @@ impl<'cfg> Source for GitSource<'cfg> {
trace!("updating git source `{:?}`", self.remote);
self.remote.checkout(
&db_path,
db,
&self.manifest_reference,
locked_rev,
self.config,
)?
let locked_rev = locked_rev.clone().into();
self.remote
.checkout(&db_path, db, &locked_rev, self.config)?
}
};
@ -321,7 +367,7 @@ impl<'cfg> Source for GitSource<'cfg> {
self.path_source = Some(path_source);
self.short_id = Some(short_id.as_str().into());
self.locked_rev = Some(actual_rev);
self.locked_rev = Revision::Locked(actual_rev);
self.path_source.as_mut().unwrap().update()?;
// Hopefully this shouldn't incur too much of a performance hit since
@ -350,7 +396,10 @@ impl<'cfg> Source for GitSource<'cfg> {
}
fn fingerprint(&self, _pkg: &Package) -> CargoResult<String> {
Ok(self.locked_rev.as_ref().unwrap().to_string())
match &self.locked_rev {
Revision::Locked(oid) => Ok(oid.to_string()),
_ => unreachable!("locked_rev must be resolved when computing fingerprint"),
}
}
fn describe(&self) -> String {

View file

@ -95,8 +95,6 @@ impl GitRemote {
/// This ensures that it gets the up-to-date commit when a named reference
/// is given (tag, branch, refs/*). Thus, network connection is involved.
///
/// When `locked_rev` is provided, it takes precedence over `reference`.
///
/// If we have a previous instance of [`GitDatabase`] then fetch into that
/// if we can. If that can successfully load our revision then we've
/// populated the database with the latest version of `reference`, so
@ -106,11 +104,8 @@ impl GitRemote {
into: &Path,
db: Option<GitDatabase>,
reference: &GitReference,
locked_rev: Option<git2::Oid>,
cargo_config: &Config,
) -> CargoResult<(GitDatabase, git2::Oid)> {
let locked_ref = locked_rev.map(|oid| GitReference::Rev(oid.to_string()));
let reference = locked_ref.as_ref().unwrap_or(reference);
if let Some(mut db) = db {
fetch(
&mut db.repo,
@ -121,11 +116,7 @@ impl GitRemote {
)
.with_context(|| format!("failed to fetch into: {}", into.display()))?;
let resolved_commit_hash = match locked_rev {
Some(rev) => db.contains(rev).then_some(rev),
None => resolve_ref(reference, &db.repo).ok(),
};
if let Some(rev) = resolved_commit_hash {
if let Some(rev) = resolve_ref(reference, &db.repo).ok() {
return Ok((db, rev));
}
}
@ -146,10 +137,7 @@ impl GitRemote {
RemoteKind::GitDependency,
)
.with_context(|| format!("failed to clone into: {}", into.display()))?;
let rev = match locked_rev {
Some(rev) => rev,
None => resolve_ref(reference, &repo)?,
};
let rev = resolve_ref(reference, &repo)?;
Ok((
GitDatabase {

View file

@ -757,13 +757,11 @@ fn update_with_shared_deps() {
.with_status(101)
.with_stderr(
"\
[UPDATING] git repository [..]
[ERROR] Unable to update [..]
Caused by:
precise value for git is not a git revision: 0.1.2
Caused by:
unable to parse OID - contains invalid characters; class=Invalid (3)
revspec '0.1.2' not found; class=Reference (4); code=NotFound (-3)
",
)
.run();

View file

@ -1281,6 +1281,7 @@ fn update_precise_git_revisions() {
});
let tag_name = "Nazgûl";
git::tag(&git_repo, tag_name);
let tag_commit_id = git_repo.head().unwrap().target().unwrap().to_string();
git_project.change_file("src/lib.rs", "fn f() {}");
git::add(&git_repo);
@ -1313,29 +1314,41 @@ fn update_precise_git_revisions() {
p.cargo("update git --precise")
.arg(tag_name)
.with_status(101)
.with_stderr(format!(
"\
[ERROR] Unable to update {url}#{tag_name}
Caused by:
precise value for git is not a git revision: {tag_name}
Caused by:
[..]"
))
.run();
p.cargo("update git --precise")
.arg(short_id)
.with_status(101)
.with_stderr(format!(
"\
[UPDATING] git repository `{url}`
[ERROR] Unable to update {url}#{short_id}
Caused by:
object not found - no match for id ({short_id}00000000000000[..]); [..]",
[UPDATING] git v0.5.0 ([..]) -> #{}",
&tag_commit_id[..8],
))
.run();
assert!(p.read_lockfile().contains(&tag_commit_id));
assert!(!p.read_lockfile().contains(&head_id));
p.cargo("update git --precise")
.arg(short_id)
.with_stderr(format!(
"\
[UPDATING] git repository `{url}`
[UPDATING] git v0.5.0 ([..]) -> #{short_id}",
))
.run();
assert!(p.read_lockfile().contains(&head_id));
assert!(!p.read_lockfile().contains(&tag_commit_id));
// updating back to tag still requires a git fetch,
// as the ref may change over time.
p.cargo("update git --precise")
.arg(tag_name)
.with_stderr(format!(
"\
[UPDATING] git repository `{url}`
[UPDATING] git v0.5.0 ([..]) -> #{}",
&tag_commit_id[..8],
))
.run();
assert!(p.read_lockfile().contains(&tag_commit_id));
assert!(!p.read_lockfile().contains(&head_id));
}