Compare commits

...

10 Commits

Author SHA1 Message Date
Weihang Lo
019e1ec50a
Merge aadab5fb97 into 9441b91186 2024-06-28 15:45:10 +02:00
bors
9441b91186 Auto merge of #14159 - dieterplex:migrate-git-snapbox, r=weihanglo
test: Migrate git to snapbox

Part of #14039.

There is a case need to modify regex for file size redaction.
2024-06-28 13:05:10 +00:00
d1t2
32cdb261ef
test: Migrate git to snapbox 2024-06-28 17:39:24 +08:00
d1t2
ed027736e7
test: Allow redact file size w/o fraction
`cargo clean` shows file size without fraction in summary when the size
is lower than 1024. And we need to avoid matching things like `%2B%23..`
found in other test cases, the trailing `\s` is added to regex.
2024-06-27 16:58:52 +08:00
Weihang Lo
aadab5fb97
fix(index): change sqlite schema to (name, ver, blob) 2024-04-02 13:55:19 -04:00
Weihang Lo
4b4a9934f8
fix(index): deferr SQLite insertions 2024-04-02 13:54:00 -04:00
Weihang Lo
4ab7aab0ff
test(index): enable SQLite index cache in CI
__CARGO_TEST_FORCE_SQLITE_INDEX_CACHE to force enable it.
2024-03-26 21:01:29 -04:00
Weihang Lo
3d5b357dba
feat(index): index cache in SQLite3 2024-03-26 21:01:29 -04:00
Weihang Lo
0f8f034ab7
feat(unstable): new unstable flag -Zindex-cache-sqlite 2024-03-26 21:01:29 -04:00
Weihang Lo
64f899d1aa
refactor(index): abstract CacheStore trait from CacheManager 2024-03-26 21:01:28 -04:00
8 changed files with 964 additions and 568 deletions

View File

@ -192,6 +192,10 @@ jobs:
- name: Clear test output
run: ci/clean-test-output.sh
- name: Check operability of index cache in SQLite3
run: 'cargo test -p cargo --test testsuite -- alt_registry:: global_cache_tracker::'
env:
__CARGO_TEST_FORCE_SQLITE_INDEX_CACHE: 1
# This only tests `cargo fix` because fix-proxy-mode is one of the most
# complicated subprocess management in Cargo.
- name: Check operability of rustc invocation with argfile

View File

@ -171,7 +171,7 @@ fn add_common_redactions(subs: &mut snapbox::Redactions) {
.unwrap();
subs.insert(
"[FILE_SIZE]",
regex!(r"(?<redacted>[0-9]+(\.[0-9]+)([a-zA-Z]i)?)B"),
regex!(r"(?<redacted>[0-9]+(\.[0-9]+)?([a-zA-Z]i)?)B\s"),
)
.unwrap();
subs.insert(

View File

@ -769,6 +769,7 @@ unstable_cli_options!(
#[serde(deserialize_with = "deserialize_gitoxide_features")]
gitoxide: Option<GitoxideFeatures> = ("Use gitoxide for the given git interactions, or all of them if no argument is given"),
host_config: bool = ("Enable the `[host]` section in the .cargo/config.toml file"),
index_cache_sqlite: bool,
minimal_versions: bool = ("Resolve minimal dependency versions instead of maximum"),
msrv_policy: bool = ("Enable rust-version aware policy within cargo"),
mtime_on_use: bool = ("Configure Cargo to update the mtime of used files"),
@ -1270,6 +1271,7 @@ impl CliUnstable {
)?
}
"host-config" => self.host_config = parse_empty(k, v)?,
"index-cache-sqlite" => self.index_cache_sqlite = parse_empty(k, v)?,
"next-lockfile-bump" => self.next_lockfile_bump = parse_empty(k, v)?,
"minimal-versions" => self.minimal_versions = parse_empty(k, v)?,
"msrv-policy" => self.msrv_policy = parse_empty(k, v)?,

View File

@ -65,6 +65,9 @@
//! [`IndexSummary::parse`]: super::IndexSummary::parse
//! [`RemoteRegistry`]: crate::sources::registry::remote::RemoteRegistry
use std::cell::OnceCell;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::fs;
use std::io;
use std::path::PathBuf;
@ -72,14 +75,21 @@ use std::str;
use anyhow::bail;
use cargo_util::registry::make_dep_path;
use rusqlite::params;
use rusqlite::Connection;
use semver::Version;
use crate::util::cache_lock::CacheLockMode;
use crate::util::sqlite;
use crate::util::sqlite::basic_migration;
use crate::util::sqlite::Migration;
use crate::util::Filesystem;
use crate::CargoResult;
use crate::GlobalContext;
use super::split;
use super::Summaries;
use super::MaybeIndexSummary;
use super::INDEX_V_MAX;
/// The current version of [`SummariesCache`].
@ -220,12 +230,30 @@ impl<'a> SummariesCache<'a> {
}
}
/// An abstraction of the actual cache store.
trait CacheStore {
/// Gets the cache associated with the key.
fn get(&self, key: &str) -> Option<MaybeSummaries>;
/// Associates the value with the key.
fn put(&self, key: &str, value: &[u8]);
/// Associates the value with the key + version tuple.
fn put_summary(&self, key: (&str, &Version), value: &[u8]);
/// Invalidates the cache associated with the key.
fn invalidate(&self, key: &str);
}
pub enum MaybeSummaries {
Unparsed(Vec<u8>),
Parsed(Summaries),
}
/// Manages the on-disk index caches.
pub struct CacheManager<'gctx> {
/// The root path where caches are located.
cache_root: Filesystem,
/// [`GlobalContext`] reference for convenience.
gctx: &'gctx GlobalContext,
store: Box<dyn CacheStore + 'gctx>,
is_sqlite: bool,
}
impl<'gctx> CacheManager<'gctx> {
@ -233,14 +261,70 @@ impl<'gctx> CacheManager<'gctx> {
///
/// `root` --- The root path where caches are located.
pub fn new(cache_root: Filesystem, gctx: &'gctx GlobalContext) -> CacheManager<'gctx> {
CacheManager { cache_root, gctx }
#[allow(clippy::disallowed_methods)]
let use_sqlite = gctx.cli_unstable().index_cache_sqlite
|| std::env::var("__CARGO_TEST_FORCE_SQLITE_INDEX_CACHE").is_ok();
let store: Box<dyn CacheStore> = if use_sqlite {
Box::new(LocalDatabase::new(cache_root, gctx))
} else {
Box::new(LocalFileSystem::new(cache_root, gctx))
};
CacheManager { store, is_sqlite: use_sqlite }
}
pub fn is_sqlite(&self) -> bool {
self.is_sqlite
}
/// Gets the cache associated with the key.
pub fn get(&self, key: &str) -> Option<Vec<u8>> {
pub fn get(&self, key: &str) -> Option<MaybeSummaries> {
self.store.get(key)
}
/// Associates the value with the key.
pub fn put(&self, key: &str, value: &[u8]) {
self.store.put(key, value)
}
/// Associates the value with the key + version tuple.
pub fn put_summary(&self, key: (&str, &Version), value: &[u8]) {
self.store.put_summary(key, value)
}
/// Invalidates the cache associated with the key.
pub fn invalidate(&self, key: &str) {
self.store.invalidate(key)
}
}
/// Stores index caches in a file system wth a registry index like layout.
struct LocalFileSystem<'gctx> {
/// The root path where caches are located.
cache_root: Filesystem,
/// [`GlobalContext`] reference for convenience.
gctx: &'gctx GlobalContext,
}
impl LocalFileSystem<'_> {
/// Creates a new instance of the file system index cache store.
fn new(cache_root: Filesystem, gctx: &GlobalContext) -> LocalFileSystem<'_> {
LocalFileSystem { cache_root, gctx }
}
fn cache_path(&self, key: &str) -> PathBuf {
let relative = make_dep_path(key, false);
// This is the file we're loading from cache or the index data.
// See module comment in `registry/mod.rs` for why this is structured
// the way it is.
self.cache_root.join(relative).into_path_unlocked()
}
}
impl CacheStore for LocalFileSystem<'_> {
fn get(&self, key: &str) -> Option<MaybeSummaries> {
let cache_path = &self.cache_path(key);
match fs::read(cache_path) {
Ok(contents) => Some(contents),
Ok(contents) => Some(MaybeSummaries::Unparsed(contents)),
Err(e) => {
tracing::debug!(?cache_path, "cache missing: {e}");
None
@ -248,8 +332,7 @@ impl<'gctx> CacheManager<'gctx> {
}
}
/// Associates the value with the key.
pub fn put(&self, key: &str, value: &[u8]) {
fn put(&self, key: &str, value: &[u8]) {
let cache_path = &self.cache_path(key);
if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
let path = Filesystem::new(cache_path.clone());
@ -261,8 +344,11 @@ impl<'gctx> CacheManager<'gctx> {
}
}
/// Invalidates the cache associated with the key.
pub fn invalidate(&self, key: &str) {
fn put_summary(&self, _key: (&str, &Version), _value: &[u8]) {
panic!("unsupported");
}
fn invalidate(&self, key: &str) {
let cache_path = &self.cache_path(key);
if let Err(e) = fs::remove_file(cache_path) {
if e.kind() != io::ErrorKind::NotFound {
@ -270,12 +356,137 @@ impl<'gctx> CacheManager<'gctx> {
}
}
}
}
fn cache_path(&self, key: &str) -> PathBuf {
let relative = make_dep_path(key, false);
// This is the file we're loading from cache or the index data.
// See module comment in `registry/mod.rs` for why this is structured
// the way it is.
self.cache_root.join(relative).into_path_unlocked()
/// Stores index caches in a local SQLite database.
struct LocalDatabase<'gctx> {
/// The root path where caches are located.
cache_root: Filesystem,
/// Connection to the SQLite database.
conn: OnceCell<Option<RefCell<Connection>>>,
/// [`GlobalContext`] reference for convenience.
deferred_writes: RefCell<BTreeMap<String, Vec<(String, Vec<u8>)>>>,
gctx: &'gctx GlobalContext,
}
impl LocalDatabase<'_> {
/// Creates a new instance of the SQLite index cache store.
fn new(cache_root: Filesystem, gctx: &GlobalContext) -> LocalDatabase<'_> {
LocalDatabase {
cache_root,
conn: OnceCell::new(),
deferred_writes: Default::default(),
gctx,
}
}
fn conn(&self) -> Option<&RefCell<Connection>> {
self.conn
.get_or_init(|| {
self.conn_init()
.map(RefCell::new)
.map_err(|e| tracing::debug!("cannot open index cache db: {e}"))
.ok()
})
.as_ref()
}
fn conn_init(&self) -> CargoResult<Connection> {
let _lock = self
.gctx
.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)
.unwrap();
let cache_root = self.cache_root.as_path_unlocked();
fs::create_dir_all(cache_root)?;
let mut conn = Connection::open(cache_root.join("index-cache.db"))?;
sqlite::migrate(&mut conn, &migrations())?;
Ok(conn)
}
fn bulk_put(&self) -> CargoResult<()> {
let Some(conn) = self.conn() else {
anyhow::bail!("no connection");
};
let mut conn = conn.borrow_mut();
let tx = conn.transaction()?;
let mut stmt =
tx.prepare_cached("INSERT OR REPLACE INTO summaries (name, version, value) VALUES (?, ?, ?)")?;
for (name, summaries) in self.deferred_writes.borrow().iter() {
for (version, value) in summaries {
stmt.execute(params!(name, version, value))?;
}
}
drop(stmt);
tx.commit()?;
self.deferred_writes.borrow_mut().clear();
Ok(())
}
}
impl Drop for LocalDatabase<'_> {
fn drop(&mut self) {
let _ = self
.bulk_put()
.map_err(|e| tracing::info!("failed to flush cache: {e}"));
}
}
impl CacheStore for LocalDatabase<'_> {
fn get(&self, key: &str) -> Option<MaybeSummaries> {
self.conn()?
.borrow()
.prepare_cached("SELECT version, value FROM summaries WHERE name = ?")
.and_then(|mut stmt| {
let rows = stmt.query_map([key], |row| Ok((row.get(0)?, row.get(1)?)))?;
let mut summaries = Summaries::default();
for row in rows {
let (version, raw_data): (String, Vec<u8>) = row?;
let version = Version::parse(&version).expect("semver");
summaries.versions.insert(version, MaybeIndexSummary::UnparsedData(raw_data));
}
Ok(MaybeSummaries::Parsed(summaries))
})
.map_err(|e| {
tracing::debug!(key, "cache missing: {e}");
})
.ok()
}
fn put(&self, _key: &str, _value: &[u8]) {
panic!("unsupported");
}
fn put_summary(&self, (name, version): (&str, &Version), value: &[u8]) {
self.deferred_writes
.borrow_mut()
.entry(name.into())
.or_insert(Default::default())
.push((version.to_string(), value.to_vec()));
}
fn invalidate(&self, key: &str) {
if let Some(conn) = self.conn() {
_ = conn
.borrow()
.prepare_cached("DELETE FROM summaries WHERE name = ?")
.and_then(|mut stmt| stmt.execute([key]))
.map_err(|e| tracing::debug!(key, "failed to remove from cache: {e}"));
}
}
}
/// Migrations which initialize the database, and can be used to evolve it over time.
///
/// See [`Migration`] for more detail.
///
/// **Be sure to not change the order or entries here!**
fn migrations() -> Vec<Migration> {
vec![basic_migration(
"CREATE TABLE IF NOT EXISTS summaries (
name TEXT NOT NULL,
version TEXT NOT NULL,
value BLOB NOT NULL,
PRIMARY KEY (name, version)
)",
)]
}

View File

@ -40,7 +40,7 @@ use std::task::{ready, Poll};
use tracing::{debug, info};
mod cache;
use self::cache::CacheManager;
use self::cache::{CacheManager, MaybeSummaries};
use self::cache::SummariesCache;
/// The maximum schema version of the `v` field in the index this version of
@ -115,7 +115,8 @@ struct Summaries {
enum MaybeIndexSummary {
/// A summary which has not been parsed, The `start` and `end` are pointers
/// into [`Summaries::raw_data`] which this is an entry of.
Unparsed { start: usize, end: usize },
Unparsed(std::ops::Range<usize>),
UnparsedData(Vec<u8>),
/// An actually parsed summary.
Parsed(IndexSummary),
@ -551,14 +552,20 @@ impl Summaries {
let mut cached_summaries = None;
let mut index_version = None;
if let Some(contents) = cache_manager.get(name) {
match Summaries::parse_cache(contents) {
Ok((s, v)) => {
cached_summaries = Some(s);
index_version = Some(v);
if let Some(maybe_summaries) = cache_manager.get(name) {
match maybe_summaries {
MaybeSummaries::Unparsed(contents) => match Summaries::parse_cache(contents) {
Ok((s, v)) => {
cached_summaries = Some(s);
index_version = Some(v);
}
Err(e) => {
tracing::debug!("failed to parse {name:?} cache: {e}");
}
}
Err(e) => {
tracing::debug!("failed to parse {name:?} cache: {e}");
MaybeSummaries::Parsed(summaries) => {
cached_summaries = Some(summaries);
index_version = Some("2".into());
}
}
}
@ -611,9 +618,18 @@ impl Summaries {
}
};
let version = summary.package_id().version().clone();
cache.versions.push((version.clone(), line));
if cache_manager.is_sqlite() {
cache_manager.put_summary((&name, &version), line);
} else {
cache.versions.push((version.clone(), line));
}
ret.versions.insert(version, summary.into());
}
if cache_manager.is_sqlite() {
return Poll::Ready(Ok(Some(ret)));
}
if let Some(index_version) = index_version {
tracing::trace!("caching index_version {}", index_version);
let cache_bytes = cache.serialize(index_version.as_str());
@ -649,7 +665,7 @@ impl Summaries {
for (version, summary) in cache.versions {
let (start, end) = subslice_bounds(&contents, summary);
ret.versions
.insert(version, MaybeIndexSummary::Unparsed { start, end });
.insert(version, MaybeIndexSummary::Unparsed(start..end));
}
ret.raw_data = contents;
return Ok((ret, index_version));
@ -680,14 +696,16 @@ impl MaybeIndexSummary {
source_id: SourceId,
bindeps: bool,
) -> CargoResult<&IndexSummary> {
let (start, end) = match self {
MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
let data = match self {
MaybeIndexSummary::Unparsed(range) => &raw_data[range.clone()],
MaybeIndexSummary::UnparsedData(data) => data,
MaybeIndexSummary::Parsed(summary) => return Ok(summary),
};
let summary = IndexSummary::parse(&raw_data[start..end], source_id, bindeps)?;
let summary = IndexSummary::parse(data, source_id, bindeps)?;
*self = MaybeIndexSummary::Parsed(summary);
match self {
MaybeIndexSummary::Unparsed { .. } => unreachable!(),
MaybeIndexSummary::UnparsedData { .. } => unreachable!(),
MaybeIndexSummary::Parsed(summary) => Ok(summary),
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,104 @@
//! Tests for the `-Zindex-cache-sqlite`.
use std::collections::HashSet;
use cargo_test_support::paths;
use cargo_test_support::project;
use cargo_test_support::registry;
use cargo_test_support::registry::Package;
#[cargo_test]
fn gated() {
project()
.build()
.cargo("fetch")
.arg("-Zindex-cache-sqlite")
.with_status(101)
.with_stderr_contains("[ERROR] the `-Z` flag is only accepted on the nightly channel of Cargo, but this is the `stable` channel")
.run();
}
#[cargo_test]
fn crates_io() {
registry::alt_init();
let p = project()
.file(
"Cargo.toml",
r#"
[package]
name = "foo"
edition = "2015"
[dependencies]
dep2 = "0.0.0"
"#,
)
.file("src/main.rs", "fn main() {}")
.build();
Package::new("dep1", "0.0.0").publish();
Package::new("dep2", "0.0.0").dep("dep1", "0.0.0").publish();
Package::new("dep3", "0.0.0").publish();
p.cargo("fetch")
.masquerade_as_nightly_cargo(&["index-cache-sqlite"])
.arg("-Zindex-cache-sqlite")
.with_stderr(
"\
[UPDATING] `dummy-registry` index
[LOCKING] 3 packages
[DOWNLOADING] crates ...
[DOWNLOADED] dep1 v0.0.0 (registry `dummy-registry`)
[DOWNLOADED] dep2 v0.0.0 (registry `dummy-registry`)
",
)
.run();
assert_rows_inserted(&["dep1", "dep2"]);
p.change_file(
"Cargo.toml",
r#"
[package]
name = "foo"
edition = "2015"
[dependencies]
dep2 = "0.0.0"
dep3 = "0.0.0"
"#,
);
p.cargo("fetch")
.masquerade_as_nightly_cargo(&["index-cache-sqlite"])
.arg("-Zindex-cache-sqlite")
.with_stderr(
"\
[UPDATING] `dummy-registry` index
[LOCKING] 1 package
[ADDING] dep3 v0.0.0
[DOWNLOADING] crates ...
[DOWNLOADED] dep3 v0.0.0 (registry `dummy-registry`)
",
)
.run();
assert_rows_inserted(&["dep1", "dep2", "dep3"]);
}
#[track_caller]
fn assert_rows_inserted(names: &[&str]) {
let pattern = paths::home().join(".cargo/registry/index/*/.cache/index-cache.db");
let pattern = pattern.to_str().unwrap();
let db_path = glob::glob(pattern).unwrap().next().unwrap().unwrap();
let set: HashSet<String> = rusqlite::Connection::open(&db_path)
.unwrap()
.prepare("SELECT name FROM summaries")
.unwrap()
.query_map([], |row| row.get(0))
.unwrap()
.collect::<Result<_, _>>()
.unwrap();
assert_eq!(set, HashSet::from_iter(names.iter().map(|n| n.to_string())));
}

View File

@ -104,6 +104,7 @@ mod glob_targets;
mod global_cache_tracker;
mod help;
mod https;
mod index_cache_sqlite;
mod inheritable_workspace_fields;
mod install;
mod install_upgrade;