Auto merge of #6880 - alexcrichton:cache, r=Eh2406

Parse less JSON on null builds

This commit fixes a performance pathology in Cargo today. Whenever Cargo
generates a lock file (which happens on all invocations of `cargo build`
for example) Cargo will parse the crates.io index to learn about
dependencies. Currently, however, when it parses a crate it parses the
JSON blob for every single version of the crate. With a lock file,
however, or with incremental builds only one of these lines of JSON is
relevant. Measured today Cargo building Cargo parses 3700 JSON
dependencies in the registry.

This commit implements an optimization that brings down the number of
parsed JSON lines in the registry to precisely the right number
necessary to build a project. For example Cargo has 150 crates in its
lock file, so now it only parses 150 JSON lines (a 20x reduction from
3700). This in turn can greatly improve Cargo's null build time. Cargo
building Cargo dropped from 120ms to 60ms on a Linux machine and 400ms
to 200ms on a Mac.

The commit internally has a lot more details about how this is done but
the general idea is to have a cache which is optimized for Cargo to read
which is maintained automatically by Cargo.

Closes #6866
This commit is contained in:
bors 2019-05-03 14:37:34 +00:00
commit 22e2f23d5c
20 changed files with 753 additions and 278 deletions

View file

@ -44,6 +44,7 @@ lazycell = "1.2.0"
libc = "0.2"
log = "0.4.6"
libgit2-sys = "0.7.9"
memchr = "2.1.3"
num_cpus = "1.0"
opener = "0.4"
rustfix = "0.4.4"

View file

@ -160,10 +160,9 @@ impl Layout {
pub fn prepare(&mut self) -> io::Result<()> {
if fs::metadata(&self.root).is_err() {
fs::create_dir_all(&self.root)?;
self.exclude_from_backups(&self.root);
}
self.exclude_from_backups(&self.root);
mkdir(&self.deps)?;
mkdir(&self.native)?;
mkdir(&self.incremental)?;

View file

@ -14,6 +14,7 @@ pub use self::shell::{Shell, Verbosity};
pub use self::source::{GitReference, Source, SourceId, SourceMap};
pub use self::summary::{FeatureMap, FeatureValue, Summary};
pub use self::workspace::{Members, Workspace, WorkspaceConfig, WorkspaceRootConfig};
pub use self::interning::InternedString;
pub mod compiler;
pub mod dependency;

View file

@ -25,6 +25,7 @@ use crate::ops;
use crate::util::errors::{CargoResult, CargoResultExt, HttpNot200};
use crate::util::network::Retry;
use crate::util::{self, internal, lev_distance, Config, Progress, ProgressStyle};
use crate::util::config::PackageCacheLock;
/// Information about a package that is available somewhere in the file system.
///
@ -339,6 +340,9 @@ pub struct Downloads<'a, 'cfg: 'a> {
/// trigger a timeout; reset `next_speed_check` and set this back to the
/// configured threshold.
next_speed_check_bytes_threshold: Cell<u64>,
/// Global filesystem lock to ensure only one Cargo is downloading at a
/// time.
_lock: PackageCacheLock<'cfg>,
}
struct Download<'cfg> {
@ -437,6 +441,7 @@ impl<'cfg> PackageSet<'cfg> {
timeout,
next_speed_check: Cell::new(Instant::now()),
next_speed_check_bytes_threshold: Cell::new(0),
_lock: self.config.acquire_package_cache_lock()?,
})
}

View file

@ -337,7 +337,7 @@ impl SourceId {
Kind::Registry => {}
_ => return false,
}
self.inner.url.to_string() == CRATES_IO_INDEX
self.inner.url.as_str() == CRATES_IO_INDEX
}
/// Hashes `self`.

View file

@ -40,6 +40,10 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
failure::bail!("you can't update in the offline mode");
}
// Updates often require a lot of modifications to the registry, so ensure
// that we're synchronized against other Cargos.
let _lock = ws.config().acquire_package_cache_lock()?;
let previous_resolve = match ops::load_pkg_lockfile(ws)? {
Some(resolve) => resolve,
None => return generate_lockfile(ws),
@ -73,6 +77,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
});
}
}
registry.add_sources(sources)?;
}

View file

@ -488,6 +488,11 @@ fn check_yanked_install(ws: &Workspace<'_>) -> CargoResult<()> {
// wouldn't be available for `compile_ws`.
let (pkg_set, resolve) = ops::resolve_ws_with_method(ws, Method::Everything, &specs)?;
let mut sources = pkg_set.sources_mut();
// Checking the yanked status invovles taking a look at the registry and
// maybe updating files, so be sure to lock it here.
let _lock = ws.config().acquire_package_cache_lock()?;
for pkg_id in resolve.iter() {
if let Some(source) = sources.get_mut(pkg_id.source_id()) {
if source.is_yanked(pkg_id)? {

View file

@ -548,6 +548,10 @@ fn compare_resolve(
}
fn check_yanked(config: &Config, pkg_set: &PackageSet<'_>, resolve: &Resolve) -> CargoResult<()> {
// Checking the yanked status invovles taking a look at the registry and
// maybe updating files, so be sure to lock it here.
let _lock = config.acquire_package_cache_lock()?;
let mut sources = pkg_set.sources_mut();
for pkg_id in resolve.iter() {
if let Some(source) = sources.get_mut(pkg_id.source_id()) {

View file

@ -564,6 +564,11 @@ pub fn select_pkg<'a, T>(
where
T: Source + 'a,
{
// This operation may involve updating some sources or making a few queries
// which may involve frobbing caches, as a result make sure we synchronize
// with other global Cargos
let _lock = config.acquire_package_cache_lock()?;
if needs_update {
source.update()?;
}

View file

@ -353,6 +353,7 @@ fn registry(
let token = token.or(token_config);
let sid = get_source_id(config, index_config.or(index), registry)?;
let api_host = {
let _lock = config.acquire_package_cache_lock()?;
let mut src = RegistrySource::remote(sid, &HashSet::new(), config);
// Only update the index if the config is not available or `force` is set.
let cfg = src.config();

View file

@ -146,6 +146,10 @@ pub fn resolve_with_previous<'cfg>(
specs: &[PackageIdSpec],
register_patches: bool,
) -> CargoResult<Resolve> {
// We only want one Cargo at a time resolving a crate graph since this can
// involve a lot of frobbing of the global caches.
let _lock = ws.config().acquire_package_cache_lock()?;
// Here we place an artificial limitation that all non-registry sources
// cannot be locked at more than one revision. This means that if a Git
// repository provides more than one package, they must all be updated in

View file

@ -154,12 +154,9 @@ impl<'cfg> Source for GitSource<'cfg> {
}
fn update(&mut self) -> CargoResult<()> {
let lock =
self.config
.git_path()
.open_rw(".cargo-lock-git", self.config, "the git checkouts")?;
let db_path = lock.parent().join("db").join(&self.ident);
let git_path = self.config.git_path();
let git_path = self.config.assert_package_cache_locked(&git_path);
let db_path = git_path.join("db").join(&self.ident);
if self.config.cli_unstable().offline && !db_path.exists() {
failure::bail!(
@ -189,21 +186,17 @@ impl<'cfg> Source for GitSource<'cfg> {
(self.remote.db_at(&db_path)?, actual_rev.unwrap())
};
// Dont use the full hash, in order to contribute less to reaching the path length limit
// on Windows. See <https://github.com/servo/servo/pull/14397>.
// Dont use the full hash, in order to contribute less to reaching the
// path length limit on Windows. See
// <https://github.com/servo/servo/pull/14397>.
let short_id = db.to_short_id(&actual_rev).unwrap();
let checkout_path = lock
.parent()
let checkout_path = git_path
.join("checkouts")
.join(&self.ident)
.join(short_id.as_str());
// Copy the database to the checkout location. After this we could drop
// the lock on the database as we no longer needed it, but we leave it
// in scope so the destructors here won't tamper with too much.
// Checkout is immutable, so we don't need to protect it with a lock once
// it is created.
// Copy the database to the checkout location.
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));

View file

@ -1,14 +1,82 @@
//! Management of the index of a registry source
//!
//! This module contains management of the index and various operations, such as
//! actually parsing the index, looking for crates, etc. This is intended to be
//! abstract over remote indices (downloaded via git) and local registry indices
//! (which are all just present on the filesystem).
//!
//! ## Index Performance
//!
//! One important aspect of the index is that we want to optimize the "happy
//! path" as much as possible. Whenever you type `cargo build` Cargo will
//! *always* reparse the registry and learn about dependency information. This
//! is done because Cargo needs to learn about the upstream crates.io crates
//! that you're using and ensure that the preexisting `Cargo.lock` still matches
//! the current state of the world.
//!
//! Consequently, Cargo "null builds" (the index that Cargo adds to each build
//! itself) need to be fast when accessing the index. The primary performance
//! optimization here is to avoid parsing JSON blobs from the registry if we
//! don't need them. Most secondary optimizations are centered around removing
//! allocations and such, but avoiding parsing JSON is the #1 optimization.
//!
//! When we get queries from the resolver we're given a `Dependency`. This
//! dependency in turn has a version requirement, and with lock files that
//! already exist these version requirements are exact version requirements
//! `=a.b.c`. This means that we in theory only need to parse one line of JSON
//! per query in the registry, the one that matches version `a.b.c`.
//!
//! The crates.io index, however, is not amenable to this form of query. Instead
//! the crates.io index simply is a file where each line is a JSON blob. To
//! learn about the versions in each JSON blob we would need to parse the JSON,
//! defeating the purpose of trying to parse as little as possible.
//!
//! > Note that as a small aside even *loading* the JSON from the registry is
//! > actually pretty slow. For crates.io and remote registries we don't
//! > actually check out the git index on disk because that takes quite some
//! > time and is quite large. Instead we use `libgit2` to read the JSON from
//! > the raw git objects. This in turn can be slow (aka show up high in
//! > profiles) because libgit2 has to do deflate decompression and such.
//!
//! To solve all these issues a strategy is employed here where Cargo basically
//! creates an index into the index. The first time a package is queried about
//! (first time being for an entire computer) Cargo will load the contents
//! (slowly via libgit2) from the registry. It will then (slowly) parse every
//! single line to learn about its versions. Afterwards, however, Cargo will
//! emit a new file (a cache) which is amenable for speedily parsing in future
//! invocations.
//!
//! This cache file is currently organized by basically having the semver
//! version extracted from each JSON blob. That way Cargo can quickly and easily
//! parse all versions contained and which JSON blob they're associated with.
//! The JSON blob then doesn't actually need to get parsed unless the version is
//! parsed.
//!
//! Altogether the initial measurements of this shows a massive improvement for
//! Cargo null build performance. It's expected that the improvements earned
//! here will continue to grow over time in the sense that the previous
//! implementation (parse all lines each time) actually continues to slow down
//! over time as new versions of a crate are published. In any case when first
//! implemented a null build of Cargo itself would parse 3700 JSON blobs from
//! the registry and load 150 blobs from git. Afterwards it parses 150 JSON
//! blobs and loads 0 files git. Removing 200ms or more from Cargo's startup
//! time is certainly nothing to sneeze at!
//!
//! Note that this is just a high-level overview, there's of course lots of
//! details like invalidating caches and whatnot which are handled below, but
//! hopefully those are more obvious inline in the code itself.
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use std::str;
use log::{info, trace};
use semver::Version;
use log::info;
use semver::{Version, VersionReq};
use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::RegistryData;
use crate::sources::registry::{RegistryPackage, INDEX_LOCK};
use crate::core::{InternedString, PackageId, SourceId, Summary};
use crate::sources::registry::{RegistryData, RegistryPackage};
use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver};
/// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not.
@ -99,11 +167,65 @@ fn overflow_hyphen() {
pub struct RegistryIndex<'cfg> {
source_id: SourceId,
path: Filesystem,
cache: HashMap<&'static str, Vec<(Summary, bool)>>,
// `(name, vers)` -> `checksum`
hashes: HashMap<&'static str, HashMap<Version, String>>,
summaries_cache: HashMap<InternedString, Summaries>,
config: &'cfg Config,
locked: bool,
}
/// An internal cache of summaries for a particular package.
///
/// A list of summaries are loaded from disk via one of two methods:
///
/// 1. Primarily Cargo will parse the corresponding file for a crate in the
/// upstream crates.io registry. That's just a JSON blob per line which we
/// can parse, extract the version, and then store here.
///
/// 2. Alternatively, if Cargo has previously run, we'll have a cached index of
/// dependencies for the upstream index. This is a file that Cargo maintains
/// lazily on the local filesystem and is much faster to parse since it
/// doesn't involve parsing all of the JSON.
///
/// The outward-facing interface of this doesn't matter too much where it's
/// loaded from, but it's important when reading the implementation to note that
/// we try to parse as little as possible!
#[derive(Default)]
struct Summaries {
/// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
/// fields are indexes into. If a `Summaries` is loaded from the crates.io
/// index then this field will be empty since nothing is `Unparsed`.
raw_data: Vec<u8>,
/// All known versions of a crate, keyed from their `Version` to the
/// possibly parsed or unparsed version of the full summary.
versions: HashMap<Version, MaybeIndexSummary>,
}
/// A lazily parsed `IndexSummary`.
enum MaybeIndexSummary {
/// A summary which has not been parsed, The `start` and `end` are pointers
/// into `Summaries::raw_data` which this is an entry of.
Unparsed { start: usize, end: usize },
/// An actually parsed summary.
Parsed(IndexSummary),
}
/// A parsed representation of a summary from the index.
///
/// In addition to a full `Summary` we have a few auxiliary pieces of
/// information liked `yanked` and what the checksum hash is.
pub struct IndexSummary {
pub summary: Summary,
pub yanked: bool,
pub hash: String,
}
/// A representation of the cache on disk that Cargo maintains of summaries.
/// Cargo will initially parse all summaries in the registry and will then
/// serialize that into this form and place it in a new location on disk,
/// ensuring that access in the future is much speedier.
#[derive(Default)]
struct SummariesCache<'a> {
versions: Vec<(Version, &'a [u8])>,
}
impl<'cfg> RegistryIndex<'cfg> {
@ -111,155 +233,127 @@ impl<'cfg> RegistryIndex<'cfg> {
source_id: SourceId,
path: &Filesystem,
config: &'cfg Config,
locked: bool,
) -> RegistryIndex<'cfg> {
RegistryIndex {
source_id,
path: path.clone(),
cache: HashMap::new(),
hashes: HashMap::new(),
summaries_cache: HashMap::new(),
config,
locked,
}
}
/// Returns the hash listed for a specified `PackageId`.
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult<String> {
let name = pkg.name().as_str();
let version = pkg.version();
if let Some(s) = self.hashes.get(name).and_then(|v| v.get(version)) {
return Ok(s.clone());
}
// Ok, we're missing the key, so parse the index file to load it.
self.summaries(name, load)?;
self.hashes
.get(name)
.and_then(|v| v.get(version))
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))
.map(|s| s.clone())
let req = VersionReq::exact(pkg.version());
let summary = self
.summaries(pkg.name(), &req, load)?
.next()
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?;
Ok(summary.hash.clone())
}
/// Parses the on-disk metadata for the package provided.
/// Load a list of summaries for `name` package in this registry which
/// match `req`
///
/// Returns a list of pairs of `(summary, yanked)` for the package name specified.
pub fn summaries(
&mut self,
name: &'static str,
/// This function will semantically parse the on-disk index, match all
/// versions, and then return an iterator over all summaries which matched.
/// Internally there's quite a few layer of caching to amortize this cost
/// though since this method is called quite a lot on null builds in Cargo.
pub fn summaries<'a, 'b>(
&'a mut self,
name: InternedString,
req: &'b VersionReq,
load: &mut dyn RegistryData,
) -> CargoResult<&Vec<(Summary, bool)>> {
if self.cache.contains_key(name) {
return Ok(&self.cache[name]);
}
) -> CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>
where
'a: 'b,
{
let source_id = self.source_id.clone();
// First up actually parse what summaries we have available. If Cargo
// has run previously this will parse a Cargo-specific cache file rather
// than the registry itself. In effect this is intended to be a quite
// cheap operation.
let summaries = self.load_summaries(name, load)?;
self.cache.insert(name, summaries);
Ok(&self.cache[name])
// Iterate over our summaries, extract all relevant ones which match our
// version requirement, and then parse all corresponding rows in the
// registry. As a reminder this `summaries` method is called for each
// entry in a lock file on every build, so we want to absolutely
// minimize the amount of work being done here and parse as little as
// necessary.
let raw_data = &summaries.raw_data;
Ok(summaries
.versions
.iter_mut()
.filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
.filter_map(move |maybe| match maybe.parse(raw_data, source_id) {
Ok(summary) => Some(summary),
Err(e) => {
info!("failed to parse `{}` registry package: {}", name, e);
None
}
}))
}
fn load_summaries(
&mut self,
name: &str,
name: InternedString,
load: &mut dyn RegistryData,
) -> CargoResult<Vec<(Summary, bool)>> {
// Prepare the `RegistryData` which will lazily initialize internal data
// structures. Note that this is also importantly needed to initialize
// to avoid deadlocks where we acquire a lock below but the `load`
// function inside *also* wants to acquire a lock. See an instance of
// this on #5551.
load.prepare()?;
let (root, _lock) = if self.locked {
let lock = self
.path
.open_ro(Path::new(INDEX_LOCK), self.config, "the registry index");
match lock {
Ok(lock) => (lock.path().parent().unwrap().to_path_buf(), Some(lock)),
Err(_) => return Ok(Vec::new()),
}
} else {
(self.path.clone().into_path_unlocked(), None)
};
) -> CargoResult<&mut Summaries> {
// If we've previously loaded what versions are present for `name`, just
// return that since our cache should still be valid.
if self.summaries_cache.contains_key(&name) {
return Ok(self.summaries_cache.get_mut(&name).unwrap());
}
// Prepare the `RegistryData` which will lazily initialize internal data
// structures.
load.prepare()?;
// let root = self.config.assert_package_cache_locked(&self.path);
let root = load.assert_index_locked(&self.path);
let cache_root = root.join(".cache");
let index_version = load.current_version();
// See module comment in `registry/mod.rs` for why this is structured
// the way it is.
let fs_name = name
.chars()
.flat_map(|c| c.to_lowercase())
.collect::<String>();
// See module comment for why this is structured the way it is.
let raw_path = match fs_name.len() {
1 => format!("1/{}", fs_name),
2 => format!("2/{}", fs_name),
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
};
let mut ret = Vec::new();
// Attempt to handle misspellings by searching for a chain of related
// names to the original `raw_path` name. Only return summaries
// associated with the first hit, however. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
let mut hit_closure = false;
let err = load.load(&root, Path::new(&path), &mut |contents| {
hit_closure = true;
let contents = str::from_utf8(contents)
.map_err(|_| failure::format_err!("registry index file was not valid utf-8"))?;
ret.reserve(contents.lines().count());
let lines = contents.lines().map(|s| s.trim()).filter(|l| !l.is_empty());
// Attempt forwards-compatibility on the index by ignoring
// everything that we ourselves don't understand, that should
// allow future cargo implementations to break the
// interpretation of each line here and older cargo will simply
// ignore the new lines.
ret.extend(lines.filter_map(|line| {
let (summary, locked) = match self.parse_registry_package(line) {
Ok(p) => p,
Err(e) => {
info!("failed to parse `{}` registry package: {}", name, e);
trace!("line: {}", line);
return None;
}
};
Some((summary, locked))
}));
Ok(())
});
// We ignore lookup failures as those are just crates which don't exist
// or we haven't updated the registry yet. If we actually ran the
// closure though then we care about those errors.
if hit_closure {
err?;
// Crates.io ensures that there is only one hyphen and underscore equivalent
// result in the index so return when we find it.
return Ok(ret);
let summaries = Summaries::parse(
index_version.as_ref().map(|s| &**s),
&root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
)?;
if let Some(summaries) = summaries {
self.summaries_cache.insert(name, summaries);
return Ok(self.summaries_cache.get_mut(&name).unwrap());
}
}
Ok(ret)
}
/// Parses a line from the registry's index file into a `Summary` for a package.
///
/// The returned boolean is whether or not the summary has been yanked.
fn parse_registry_package(&mut self, line: &str) -> CargoResult<(Summary, bool)> {
let RegistryPackage {
name,
vers,
cksum,
deps,
features,
yanked,
links,
} = serde_json::from_str(line)?;
let pkgid = PackageId::new(&name, &vers, self.source_id)?;
let name = pkgid.name();
let deps = deps
.into_iter()
.map(|dep| dep.into_dep(self.source_id))
.collect::<CargoResult<Vec<_>>>()?;
let mut summary = Summary::new(pkgid, deps, &features, links, false)?;
summary.set_checksum(cksum.clone());
self.hashes
.entry(name.as_str())
.or_insert_with(HashMap::new)
.insert(vers, cksum);
Ok((summary, yanked.unwrap_or(false)))
// If nothing was found then this crate doesn't exists, so just use an
// empty `Summaries` list.
self.summaries_cache.insert(name, Summaries::default());
Ok(self.summaries_cache.get_mut(&name).unwrap())
}
pub fn query_inner(
@ -295,31 +389,31 @@ impl<'cfg> RegistryIndex<'cfg> {
online: bool,
) -> CargoResult<usize> {
let source_id = self.source_id;
let name = dep.package_name().as_str();
let summaries = self.summaries(name, load)?;
let summaries = summaries
.iter()
.filter(|&(summary, yanked)| {
// Note: This particular logic can cause problems with
// optional dependencies when offline. If at least 1 version
// of an optional dependency is downloaded, but that version
// does not satisfy the requirements, then resolution will
// fail. Unfortunately, whether or not something is optional
// is not known here.
(online || load.is_crate_downloaded(summary.package_id()))
&& (!yanked || {
log::debug!("{:?}", yanked_whitelist);
log::debug!("{:?}", summary.package_id());
yanked_whitelist.contains(&summary.package_id())
})
})
.map(|s| s.0.clone());
let summaries = self
.summaries(dep.package_name(), dep.version_req(), load)?
// First filter summaries for `--offline`. If we're online then
// everything is a candidate, otherwise if we're offline we're only
// going to consider candidates which are actually present on disk.
//
// Note: This particular logic can cause problems with
// optional dependencies when offline. If at least 1 version
// of an optional dependency is downloaded, but that version
// does not satisfy the requirements, then resolution will
// fail. Unfortunately, whether or not something is optional
// is not known here.
.filter(|s| (online || load.is_crate_downloaded(s.summary.package_id())))
// Next filter out all yanked packages. Some yanked packages may
// leak throguh if they're in a whitelist (aka if they were
// previously in `Cargo.lock`
.filter(|s| !s.yanked || yanked_whitelist.contains(&s.summary.package_id()))
.map(|s| s.summary.clone());
// Handle `cargo update --precise` here. If specified, our own source
// will have a precise version listed of the form
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
// this source, `<p_req>` is the version installed and `<f_req> is the
// version requested (argument to `--precise`).
let name = dep.package_name().as_str();
let summaries = summaries.filter(|s| match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
@ -344,11 +438,305 @@ impl<'cfg> RegistryIndex<'cfg> {
}
pub fn is_yanked(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult<bool> {
let summaries = self.summaries(pkg.name().as_str(), load)?;
let found = summaries
.iter()
.filter(|&(summary, _yanked)| summary.version() == pkg.version())
.any(|(_summary, yanked)| *yanked);
let req = VersionReq::exact(pkg.version());
let found = self
.summaries(pkg.name(), &req, load)?
.any(|summary| summary.yanked);
Ok(found)
}
}
impl Summaries {
/// Parse out a `Summaries` instances from on-disk state.
///
/// This will attempt to prefer parsing a previous cache file that already
/// exists from a previous invocation of Cargo (aka you're typing `cargo
/// build` again after typing it previously). If parsing fails or the cache
/// isn't found, then we take a slower path which loads the full descriptor
/// for `relative` from the underlying index (aka typically libgit2 with
/// crates.io) and then parse everything in there.
///
/// * `index_version` - a version string to describe the current state of
/// the index which for remote registries is the current git sha and
/// for local registries is not available.
/// * `root` - this is the root argument passed to `load`
/// * `cache_root` - this is the root on the filesystem itself of where to
/// store cache files.
/// * `relative` - this is the file we're loading from cache or the index
/// data
/// * `source_id` - the registry's SourceId used when parsing JSON blobs to
/// create summaries.
/// * `load` - the actual index implementation which may be very slow to
/// call. We avoid this if we can.
pub fn parse(
index_version: Option<&str>,
root: &Path,
cache_root: &Path,
relative: &Path,
source_id: SourceId,
load: &mut dyn RegistryData,
config: &Config,
) -> CargoResult<Option<Summaries>> {
// First up, attempt to load the cache. This could fail for all manner
// of reasons, but consider all of them non-fatal and just log their
// occurrence in case anyone is debugging anything.
let cache_path = cache_root.join(relative);
let mut cache_contents = None;
if let Some(index_version) = index_version {
match fs::read(&cache_path) {
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
Ok(s) => {
log::debug!("fast path for registry cache of {:?}", relative);
if cfg!(debug_assertions) {
cache_contents = Some(s.raw_data);
} else {
return Ok(Some(s))
}
}
Err(e) => {
log::debug!("failed to parse {:?} cache: {}", relative, e);
}
},
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
}
}
// This is the fallback path where we actually talk to libgit2 to load
// information. Here we parse every single line in the index (as we need
// to find the versions)
log::debug!("slow path for {:?}", relative);
let mut ret = Summaries::default();
let mut hit_closure = false;
let mut cache_bytes = None;
let err = load.load(root, relative, &mut |contents| {
ret.raw_data = contents.to_vec();
let mut cache = SummariesCache::default();
hit_closure = true;
let mut start = 0;
for end in memchr::Memchr::new(b'\n', contents) {
// Attempt forwards-compatibility on the index by ignoring
// everything that we ourselves don't understand, that should
// allow future cargo implementations to break the
// interpretation of each line here and older cargo will simply
// ignore the new lines.
let line = &contents[start..end];
let summary = match IndexSummary::parse(line, source_id) {
Ok(summary) => summary,
Err(e) => {
log::info!("failed to parse {:?} registry package: {}", relative, e);
continue;
}
};
let version = summary.summary.package_id().version().clone();
cache.versions.push((version.clone(), line));
ret.versions.insert(version, summary.into());
start = end + 1;
}
if let Some(index_version) = index_version {
cache_bytes = Some(cache.serialize(index_version));
}
Ok(())
});
// We ignore lookup failures as those are just crates which don't exist
// or we haven't updated the registry yet. If we actually ran the
// closure though then we care about those errors.
if !hit_closure {
debug_assert!(cache_contents.is_none());
return Ok(None);
}
err?;
// If we've got debug assertions enabled and the cache was previously
// present and considered fresh this is where the debug assertions
// actually happens to verify that our cache is indeed fresh and
// computes exactly the same value as before.
if cfg!(debug_assertions) && cache_contents.is_some() {
assert_eq!(cache_bytes, cache_contents);
}
// Once we have our `cache_bytes` which represents the `Summaries` we're
// about to return, write that back out to disk so future Cargo
// invocations can use it.
//
// This is opportunistic so we ignore failure here but are sure to log
// something in case of error.
if let Some(cache_bytes) = cache_bytes {
if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
let path = Filesystem::new(cache_path.clone());
config.assert_package_cache_locked(&path);
if let Err(e) = fs::write(cache_path, cache_bytes) {
log::info!("failed to write cache: {}", e);
}
}
}
Ok(Some(ret))
}
/// Parses an open `File` which represents information previously cached by
/// Cargo.
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
let cache = SummariesCache::parse(&contents, last_index_update)?;
let mut ret = Summaries::default();
for (version, summary) in cache.versions {
let (start, end) = subslice_bounds(&contents, summary);
ret.versions
.insert(version, MaybeIndexSummary::Unparsed { start, end });
}
ret.raw_data = contents;
return Ok(ret);
// Returns the start/end offsets of `inner` with `outer`. Asserts that
// `inner` is a subslice of `outer`.
fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
let outer_start = outer.as_ptr() as usize;
let outer_end = outer_start + outer.len();
let inner_start = inner.as_ptr() as usize;
let inner_end = inner_start + inner.len();
assert!(inner_start >= outer_start);
assert!(inner_end <= outer_end);
(inner_start - outer_start, inner_end - outer_start)
}
}
}
// Implementation of serializing/deserializing the cache of summaries on disk.
// Currently the format looks like:
//
// +--------------+-------------+---+
// | version byte | git sha rev | 0 |
// +--------------+-------------+---+
//
// followed by...
//
// +----------------+---+------------+---+
// | semver version | 0 | JSON blob | 0 | ...
// +----------------+---+------------+---+
//
// The idea is that this is a very easy file for Cargo to parse in future
// invocations. The read from disk should be quite fast and then afterwards all
// we need to know is what versions correspond to which JSON blob.
//
// The leading version byte is intended to ensure that there's some level of
// future compatibility against changes to this cache format so if different
// versions of Cargo share the same cache they don't get too confused. The git
// sha lets us know when the file needs to be regenerated (it needs regeneration
// whenever the index itself updates).
const CURRENT_CACHE_VERSION: u8 = 1;
impl<'a> SummariesCache<'a> {
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
// NB: keep this method in sync with `serialize` below
let (first_byte, rest) = data
.split_first()
.ok_or_else(|| failure::format_err!("malformed cache"))?;
if *first_byte != CURRENT_CACHE_VERSION {
failure::bail!("looks like a different Cargo's cache, bailing out");
}
let mut iter = memchr::Memchr::new(0, rest);
let mut start = 0;
if let Some(end) = iter.next() {
let update = &rest[start..end];
if update != last_index_update.as_bytes() {
failure::bail!(
"cache out of date: current index ({}) != cache ({})",
last_index_update,
str::from_utf8(update)?,
)
}
start = end + 1;
} else {
failure::bail!("malformed file");
}
let mut ret = SummariesCache::default();
while let Some(version_end) = iter.next() {
let version = &rest[start..version_end];
let version = str::from_utf8(version)?;
let version = Version::parse(version)?;
let summary_end = iter.next().unwrap();
let summary = &rest[version_end + 1..summary_end];
ret.versions.push((version, summary));
start = summary_end + 1;
}
Ok(ret)
}
fn serialize(&self, index_version: &str) -> Vec<u8> {
// NB: keep this method in sync with `parse` above
let size = self
.versions
.iter()
.map(|(_version, data)| (10 + data.len()))
.sum();
let mut contents = Vec::with_capacity(size);
contents.push(CURRENT_CACHE_VERSION);
contents.extend_from_slice(index_version.as_bytes());
contents.push(0);
for (version, data) in self.versions.iter() {
contents.extend_from_slice(version.to_string().as_bytes());
contents.push(0);
contents.extend_from_slice(data);
contents.push(0);
}
return contents;
}
}
impl MaybeIndexSummary {
/// Parses this "maybe a summary" into a `Parsed` for sure variant.
///
/// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
/// passed in is sliced with the bounds in `Unparsed` and then actually
/// parsed.
fn parse(&mut self, raw_data: &[u8], source_id: SourceId) -> CargoResult<&IndexSummary> {
let (start, end) = match self {
MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
MaybeIndexSummary::Parsed(summary) => return Ok(summary),
};
let summary = IndexSummary::parse(&raw_data[start..end], source_id)?;
*self = MaybeIndexSummary::Parsed(summary);
match self {
MaybeIndexSummary::Unparsed { .. } => unreachable!(),
MaybeIndexSummary::Parsed(summary) => Ok(summary),
}
}
}
impl From<IndexSummary> for MaybeIndexSummary {
fn from(summary: IndexSummary) -> MaybeIndexSummary {
MaybeIndexSummary::Parsed(summary)
}
}
impl IndexSummary {
/// Parses a line from the registry's index file into an `IndexSummary` for
/// a package.
///
/// The `line` provided is expected to be valid JSON.
fn parse(line: &[u8], source_id: SourceId) -> CargoResult<IndexSummary> {
let RegistryPackage {
name,
vers,
cksum,
deps,
features,
yanked,
links,
} = serde_json::from_slice(line)?;
log::trace!("json parsed registry {}/{}", name, vers);
let pkgid = PackageId::new(&name, &vers, source_id)?;
let deps = deps
.into_iter()
.map(|dep| dep.into_dep(source_id))
.collect::<CargoResult<Vec<_>>>()?;
let mut summary = Summary::new(pkgid, deps, &features, links, false)?;
summary.set_checksum(cksum.clone());
Ok(IndexSummary {
summary,
yanked: yanked.unwrap_or(false),
hash: cksum,
})
}
}

View file

@ -1,13 +1,13 @@
use std::io::prelude::*;
use std::io::SeekFrom;
use std::path::Path;
use crate::core::PackageId;
use crate::core::{PackageId, InternedString};
use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::paths;
use crate::util::{Config, FileLock, Filesystem, Sha256};
use crate::util::{Config, Filesystem, Sha256};
use hex;
use std::fs::File;
use std::io::prelude::*;
use std::io::SeekFrom;
use std::path::Path;
pub struct LocalRegistry<'cfg> {
index_path: Filesystem,
@ -36,6 +36,16 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
&self.index_path
}
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
// Note that the `*_unlocked` variant is used here since we're not
// modifying the index and it's required to be externally synchronized.
path.as_path_unlocked()
}
fn current_version(&self) -> Option<InternedString> {
None
}
fn load(
&self,
root: &Path,
@ -71,7 +81,12 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
let crate_file = format!("{}-{}.crate", pkg.name(), pkg.version());
let mut crate_file = self.root.open_ro(&crate_file, self.config, "crate file")?;
// Note that the usage of `into_path_unlocked` here is because the local
// crate files here never change in that we're not the one writing them,
// so it's not our responsibility to synchronize access to them.
let path = self.root.join(&crate_file).into_path_unlocked();
let mut crate_file = File::open(&path)?;
// If we've already got an unpacked version of this crate, then skip the
// checksum below as it is in theory already verified.
@ -89,7 +104,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
loop {
let n = crate_file
.read(&mut buf)
.chain_err(|| format!("failed to read `{}`", crate_file.path().display()))?;
.chain_err(|| format!("failed to read `{}`", path.display()))?;
if n == 0 {
break;
}
@ -109,7 +124,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
_pkg: PackageId,
_checksum: &str,
_data: &[u8],
) -> CargoResult<FileLock> {
) -> CargoResult<File> {
panic!("this source doesn't download")
}
}

View file

@ -161,25 +161,25 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};
use flate2::read::GzDecoder;
use log::debug;
use semver::Version;
use semver::{Version, VersionReq};
use serde::Deserialize;
use tar::Archive;
use crate::core::dependency::{Dependency, Kind};
use crate::core::source::MaybePackage;
use crate::core::{Package, PackageId, Source, SourceId, Summary};
use crate::core::{Package, PackageId, Source, SourceId, Summary, InternedString};
use crate::sources::PathSource;
use crate::util::errors::CargoResultExt;
use crate::util::hex;
use crate::util::to_url::ToUrl;
use crate::util::{internal, CargoResult, Config, FileLock, Filesystem};
use crate::util::{internal, CargoResult, Config, Filesystem};
const INDEX_LOCK: &str = ".cargo-index-lock";
const PACKAGE_SOURCE_LOCK: &str = ".cargo-ok";
pub const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index";
pub const CRATES_IO_REGISTRY: &str = "crates-io";
@ -194,7 +194,6 @@ pub struct RegistrySource<'cfg> {
ops: Box<dyn RegistryData + 'cfg>,
index: index::RegistryIndex<'cfg>,
yanked_whitelist: HashSet<PackageId>,
index_locked: bool,
}
#[derive(Deserialize)]
@ -358,27 +357,25 @@ pub trait RegistryData {
fn index_path(&self) -> &Filesystem;
fn load(
&self,
_root: &Path,
root: &Path,
path: &Path,
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
) -> CargoResult<()>;
fn config(&mut self) -> CargoResult<Option<RegistryConfig>>;
fn update_index(&mut self) -> CargoResult<()>;
fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock>;
fn finish_download(
&mut self,
pkg: PackageId,
checksum: &str,
data: &[u8],
) -> CargoResult<FileLock>;
fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8])
-> CargoResult<File>;
fn is_crate_downloaded(&self, _pkg: PackageId) -> bool {
true
}
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
fn current_version(&self) -> Option<InternedString>;
}
pub enum MaybeLock {
Ready(FileLock),
Ready(File),
Download { url: String, descriptor: String },
}
@ -400,14 +397,7 @@ impl<'cfg> RegistrySource<'cfg> {
) -> RegistrySource<'cfg> {
let name = short_name(source_id);
let ops = remote::RemoteRegistry::new(source_id, config, &name);
RegistrySource::new(
source_id,
config,
&name,
Box::new(ops),
yanked_whitelist,
true,
)
RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist)
}
pub fn local(
@ -418,14 +408,7 @@ impl<'cfg> RegistrySource<'cfg> {
) -> RegistrySource<'cfg> {
let name = short_name(source_id);
let ops = local::LocalRegistry::new(path, config, &name);
RegistrySource::new(
source_id,
config,
&name,
Box::new(ops),
yanked_whitelist,
false,
)
RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist)
}
fn new(
@ -434,16 +417,14 @@ impl<'cfg> RegistrySource<'cfg> {
name: &str,
ops: Box<dyn RegistryData + 'cfg>,
yanked_whitelist: &HashSet<PackageId>,
index_locked: bool,
) -> RegistrySource<'cfg> {
RegistrySource {
src_path: config.registry_source_path().join(name),
config,
source_id,
updated: false,
index: index::RegistryIndex::new(source_id, ops.index_path(), config, index_locked),
index: index::RegistryIndex::new(source_id, ops.index_path(), config),
yanked_whitelist: yanked_whitelist.clone(),
index_locked,
ops,
}
}
@ -459,36 +440,26 @@ impl<'cfg> RegistrySource<'cfg> {
/// compiled.
///
/// No action is taken if the source looks like it's already unpacked.
fn unpack_package(&self, pkg: PackageId, tarball: &FileLock) -> CargoResult<PathBuf> {
fn unpack_package(&self, pkg: PackageId, tarball: &File) -> CargoResult<PathBuf> {
// The `.cargo-ok` file is used to track if the source is already
// unpacked and to lock the directory for unpacking.
let mut ok = {
let package_dir = format!("{}-{}", pkg.name(), pkg.version());
let dst = self.src_path.join(&package_dir);
dst.create_dir()?;
// Attempt to open a read-only copy first to avoid an exclusive write
// lock and also work with read-only filesystems. If the file has
// any data, assume the source is already unpacked.
if let Ok(ok) = dst.open_ro(PACKAGE_SOURCE_LOCK, self.config, &package_dir) {
let meta = ok.file().metadata()?;
if meta.len() > 0 {
let unpack_dir = ok.parent().to_path_buf();
return Ok(unpack_dir);
}
}
dst.open_rw(PACKAGE_SOURCE_LOCK, self.config, &package_dir)?
};
let unpack_dir = ok.parent().to_path_buf();
// If the file has any data, assume the source is already unpacked.
let meta = ok.file().metadata()?;
// unpacked.
let package_dir = format!("{}-{}", pkg.name(), pkg.version());
let dst = self.src_path.join(&package_dir);
dst.create_dir()?;
let path = dst.join(PACKAGE_SOURCE_LOCK);
let path = self.config.assert_package_cache_locked(&path);
let unpack_dir = path.parent().unwrap();
let mut ok = OpenOptions::new()
.create(true)
.read(true)
.write(true)
.open(&path)?;
let meta = ok.metadata()?;
if meta.len() > 0 {
return Ok(unpack_dir);
return Ok(unpack_dir.to_path_buf());
}
let gz = GzDecoder::new(tarball.file());
let gz = GzDecoder::new(tarball);
let mut tar = Archive::new(gz);
let prefix = unpack_dir.file_name().unwrap();
let parent = unpack_dir.parent().unwrap();
@ -523,19 +494,18 @@ impl<'cfg> RegistrySource<'cfg> {
// Write to the lock file to indicate that unpacking was successful.
write!(ok, "ok")?;
Ok(unpack_dir)
Ok(unpack_dir.to_path_buf())
}
fn do_update(&mut self) -> CargoResult<()> {
self.ops.update_index()?;
let path = self.ops.index_path();
self.index =
index::RegistryIndex::new(self.source_id, path, self.config, self.index_locked);
self.index = index::RegistryIndex::new(self.source_id, path, self.config);
self.updated = true;
Ok(())
}
fn get_pkg(&mut self, package: PackageId, path: &FileLock) -> CargoResult<Package> {
fn get_pkg(&mut self, package: PackageId, path: &File) -> CargoResult<Package> {
let path = self
.unpack_package(package, path)
.chain_err(|| internal(format!("failed to unpack package `{}`", package)))?;
@ -548,13 +518,12 @@ impl<'cfg> RegistrySource<'cfg> {
// After we've loaded the package configure it's summary's `checksum`
// field with the checksum we know for this `PackageId`.
let summaries = self
let req = VersionReq::exact(package.version());
let summary_with_cksum = self
.index
.summaries(package.name().as_str(), &mut *self.ops)?;
let summary_with_cksum = summaries
.iter()
.map(|s| &s.0)
.find(|s| s.package_id() == package)
.summaries(package.name(), &req, &mut *self.ops)?
.map(|s| s.summary.clone())
.next()
.expect("summary not found");
if let Some(cksum) = summary_with_cksum.checksum() {
pkg.manifest_mut()

View file

@ -1,24 +1,20 @@
use crate::core::{PackageId, SourceId, InternedString};
use crate::sources::git;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{RegistryConfig, RegistryData, CRATE_TEMPLATE, VERSION_TEMPLATE};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::{Config, Filesystem, Sha256};
use lazycell::LazyCell;
use log::{debug, trace};
use std::cell::{Cell, Ref, RefCell};
use std::fmt::Write as FmtWrite;
use std::fs::{self, File, OpenOptions};
use std::io::prelude::*;
use std::io::SeekFrom;
use std::mem;
use std::path::Path;
use std::str;
use lazycell::LazyCell;
use log::{debug, trace};
use crate::core::{PackageId, SourceId};
use crate::sources::git;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{
RegistryConfig, RegistryData, CRATE_TEMPLATE, INDEX_LOCK, VERSION_TEMPLATE,
};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::{Config, Sha256};
use crate::util::{FileLock, Filesystem};
pub struct RemoteRegistry<'cfg> {
index_path: Filesystem,
cache_path: Filesystem,
@ -27,6 +23,7 @@ pub struct RemoteRegistry<'cfg> {
tree: RefCell<Option<git2::Tree<'static>>>,
repo: LazyCell<git2::Repository>,
head: Cell<Option<git2::Oid>>,
current_sha: Cell<Option<InternedString>>,
}
impl<'cfg> RemoteRegistry<'cfg> {
@ -39,12 +36,13 @@ impl<'cfg> RemoteRegistry<'cfg> {
tree: RefCell::new(None),
repo: LazyCell::new(),
head: Cell::new(None),
current_sha: Cell::new(None),
}
}
fn repo(&self) -> CargoResult<&git2::Repository> {
self.repo.try_borrow_with(|| {
let path = self.index_path.clone().into_path_unlocked();
let path = self.config.assert_package_cache_locked(&self.index_path);
// Fast path without a lock
if let Ok(repo) = git2::Repository::open(&path) {
@ -54,15 +52,11 @@ impl<'cfg> RemoteRegistry<'cfg> {
// Ok, now we need to lock and try the whole thing over again.
trace!("acquiring registry index lock");
let lock = self.index_path.open_rw(
Path::new(INDEX_LOCK),
self.config,
"the registry index",
)?;
match git2::Repository::open(&path) {
Ok(repo) => Ok(repo),
Err(_) => {
let _ = lock.remove_siblings();
drop(fs::remove_dir_all(&path));
fs::create_dir_all(&path)?;
// Note that we'd actually prefer to use a bare repository
// here as we're not actually going to check anything out.
@ -129,6 +123,8 @@ impl<'cfg> RemoteRegistry<'cfg> {
}
}
const LAST_UPDATED_FILE: &str = ".last-updated";
impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
fn prepare(&self) -> CargoResult<()> {
self.repo()?; // create intermediate dirs and initialize the repo
@ -139,6 +135,19 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
&self.index_path
}
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
self.config.assert_package_cache_locked(path)
}
fn current_version(&self) -> Option<InternedString> {
if let Some(sha) = self.current_sha.get() {
return Some(sha);
}
let sha = InternedString::new(&self.head().ok()?.to_string());
self.current_sha.set(Some(sha));
Some(sha)
}
fn load(
&self,
_root: &Path,
@ -162,9 +171,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
debug!("loading config");
self.prepare()?;
let _lock =
self.index_path
.open_ro(Path::new(INDEX_LOCK), self.config, "the registry index")?;
self.config.assert_package_cache_locked(&self.index_path);
let mut config = None;
self.load(Path::new(""), Path::new("config.json"), &mut |json| {
config = Some(serde_json::from_slice(json)?);
@ -213,9 +220,8 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
self.prepare()?;
self.head.set(None);
*self.tree.borrow_mut() = None;
let _lock =
self.index_path
.open_rw(Path::new(INDEX_LOCK), self.config, "the registry index")?;
self.current_sha.set(None);
let path = self.config.assert_package_cache_locked(&self.index_path);
self.config
.shell()
.status("Updating", self.source_id.display_index())?;
@ -227,6 +233,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
git::fetch(repo, url, refspec, self.config)
.chain_err(|| format!("failed to fetch `{}`", url))?;
self.config.updated_sources().insert(self.source_id);
// Create a dummy file to record the mtime for when we updated the
// index.
File::create(&path.join(LAST_UPDATED_FILE))?;
Ok(())
}
@ -239,8 +250,10 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
//
// If this fails then we fall through to the exclusive path where we may
// have to redownload the file.
if let Ok(dst) = self.cache_path.open_ro(&filename, self.config, &filename) {
let meta = dst.file().metadata()?;
let path = self.cache_path.join(&filename);
let path = self.config.assert_package_cache_locked(&path);
if let Ok(dst) = File::open(&path) {
let meta = dst.metadata()?;
if meta.len() > 0 {
return Ok(MaybeLock::Ready(dst));
}
@ -266,7 +279,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
pkg: PackageId,
checksum: &str,
data: &[u8],
) -> CargoResult<FileLock> {
) -> CargoResult<File> {
// Verify what we just downloaded
let mut state = Sha256::new();
state.update(data);
@ -275,8 +288,15 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
}
let filename = self.filename(pkg);
let mut dst = self.cache_path.open_rw(&filename, self.config, &filename)?;
let meta = dst.file().metadata()?;
self.cache_path.create_dir()?;
let path = self.cache_path.join(&filename);
let path = self.config.assert_package_cache_locked(&path);
let mut dst = OpenOptions::new()
.create(true)
.read(true)
.write(true)
.open(&path)?;
let meta = dst.metadata()?;
if meta.len() > 0 {
return Ok(dst);
}
@ -290,8 +310,10 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
let filename = format!("{}-{}.crate", pkg.name(), pkg.version());
let path = Path::new(&filename);
if let Ok(dst) = self.cache_path.open_ro(path, self.config, &filename) {
if let Ok(meta) = dst.file().metadata() {
let path = self.cache_path.join(path);
let path = self.config.assert_package_cache_locked(&path);
if let Ok(dst) = File::open(path) {
if let Ok(meta) = dst.metadata() {
return meta.len() > 0;
}
}

View file

@ -29,7 +29,7 @@ use crate::util::errors::{self, internal, CargoResult, CargoResultExt};
use crate::util::toml as cargo_toml;
use crate::util::Filesystem;
use crate::util::Rustc;
use crate::util::{paths, validate_package_name};
use crate::util::{paths, validate_package_name, FileLock};
use crate::util::{ToUrl, ToUrlWithBase};
/// Configuration information for cargo. This is not specific to a build, it is information
@ -76,6 +76,9 @@ pub struct Config {
profiles: LazyCell<ConfigProfiles>,
/// Tracks which sources have been updated to avoid multiple updates.
updated_sources: LazyCell<RefCell<HashSet<SourceId>>>,
/// Lock, if held, of the global package cache along with the number of
/// acquisitions so far.
package_cache_lock: RefCell<Option<(FileLock, usize)>>,
}
impl Config {
@ -132,6 +135,7 @@ impl Config {
env,
profiles: LazyCell::new(),
updated_sources: LazyCell::new(),
package_cache_lock: RefCell::new(None),
}
}
@ -828,6 +832,36 @@ impl Config {
};
T::deserialize(d).map_err(|e| e.into())
}
pub fn assert_package_cache_locked<'a>(&self, f: &'a Filesystem) -> &'a Path {
let ret = f.as_path_unlocked();
assert!(
self.package_cache_lock.borrow().is_some(),
"pacakge cache lock is not currently held, Cargo forgot to call \
`acquire_package_cache_lock` before we got to this stack frame",
);
assert!(ret.starts_with(self.home_path.as_path_unlocked()));
return ret;
}
pub fn acquire_package_cache_lock<'a>(&'a self) -> CargoResult<PackageCacheLock<'a>> {
let mut slot = self.package_cache_lock.borrow_mut();
match *slot {
Some((_, ref mut cnt)) => {
*cnt += 1;
}
None => {
let lock = self
.home_path
.open_rw(".package-cache", self, "package cache lock")
.chain_err(|| "failed to acquire package cache lock")?;
*slot = Some((lock, 1));
}
}
Ok(PackageCacheLock(self))
}
pub fn release_package_cache_lock(&self) {}
}
/// A segment of a config key.
@ -1664,3 +1698,16 @@ pub fn save_credentials(cfg: &Config, token: String, registry: Option<String>) -
Ok(())
}
}
pub struct PackageCacheLock<'a>(&'a Config);
impl Drop for PackageCacheLock<'_> {
fn drop(&mut self) {
let mut slot = self.0.package_cache_lock.borrow_mut();
let (_, cnt) = slot.as_mut().unwrap();
*cnt -= 1;
if *cnt == 0 {
*slot = None;
}
}
}

View file

@ -12,6 +12,7 @@ use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::paths;
use crate::util::Config;
#[derive(Debug)]
pub struct FileLock {
f: Option<File>,
path: PathBuf,
@ -136,6 +137,14 @@ impl Filesystem {
self.root
}
/// Returns the underlying `Path`.
///
/// Note that this is a relatively dangerous operation and should be used
/// with great caution!.
pub fn as_path_unlocked(&self) -> &Path {
&self.root
}
/// Creates the directory pointed to by this filesystem.
///
/// Handles errors where other Cargo processes are also attempting to

View file

@ -453,7 +453,7 @@ fn debug_release_ok() {
let a = a.join().unwrap();
execs()
.with_stderr(
.with_stderr_contains(
"\
[COMPILING] foo v0.0.1 [..]
[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]
@ -461,7 +461,7 @@ fn debug_release_ok() {
)
.run_output(&a);
execs()
.with_stderr(
.with_stderr_contains(
"\
[COMPILING] foo v0.0.1 [..]
[FINISHED] release [optimized] target(s) in [..]

View file

@ -108,8 +108,10 @@ fn not_update() {
let sid = SourceId::for_registry(&registry_url()).unwrap();
let cfg = Config::new(Shell::new(), paths::root(), paths::home().join(".cargo"));
let lock = cfg.acquire_package_cache_lock().unwrap();
let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg);
regsrc.update().unwrap();
drop(lock);
cargo_process("search postgres")
.with_stdout_contains("hoare = \"0.1.1\" # Design by contract style assertions for Rust")