Compare commits

...

6 Commits

Author SHA1 Message Date
Weihang Lo
1931ff8afa
Merge 0c54ce244e into 9441b91186 2024-06-28 15:47:45 +02:00
bors
9441b91186 Auto merge of #14159 - dieterplex:migrate-git-snapbox, r=weihanglo
test: Migrate git to snapbox

Part of #14039.

There is a case need to modify regex for file size redaction.
2024-06-28 13:05:10 +00:00
d1t2
32cdb261ef
test: Migrate git to snapbox 2024-06-28 17:39:24 +08:00
d1t2
ed027736e7
test: Allow redact file size w/o fraction
`cargo clean` shows file size without fraction in summary when the size
is lower than 1024. And we need to avoid matching things like `%2B%23..`
found in other test cases, the trailing `\s` is added to regex.
2024-06-27 16:58:52 +08:00
Weihang Lo
0c54ce244e
refactor(source_id): merge stable hash tests into one 2024-06-20 13:37:50 -04:00
Weihang Lo
3ff54e575e
feat: use stable hash from rustc-stable-hash
This helps `-Ztrim-paths` build a stable cross-platform path for the
registry and git sources. Sources files then can be found from the same
path when debugging.

See https://github.com/rust-lang/cargo/issues/13171#issuecomment-1864899037

A few caveats:

* This will invalidate the current downloaded caches.
  Need to put this in the Cargo CHANGELOG.
* As a consequence of changing how `SourceId` is hashed, the global cache
  tracker is also affected because Cargo writes source identifiers (e.g.
  `index.crates.io-6f17d22bba15001f`) to SQLite.
  * 6e236509b2/src/cargo/core/global_cache_tracker.rs (L388-L391)
* The performance of rustc-stable-hash is slightly worse than the old
  SipHasher in std on short things like `SourceId`, but for long stuff
  like fingerprint. See appendix.

StableHasher is used in several places (some might not be needed?):

* Rebuild detection (fingerprints)
  * Rustc version, including all the CLI args running `rustc -vV`.
    * 6e236509b2/src/cargo/util/rustc.rs (L326)
    * 6e236509b2/src/cargo/util/rustc.rs (L381)
  * Build caches
    * 6e236509b2/src/cargo/core/compiler/fingerprint/mod.rs (L1456)
* Compute rustc `-C metadata`
  * stable hash for SourceId
    * 6e236509b2/src/cargo/core/package_id.rs (L207)
  * Also read and hash contents from custom target JSON file.
    * 6e236509b2/src/cargo/core/compiler/compile_kind.rs (L81-L91)
* `UnitInner::dep_hash`
  * This is to distinguish same units having different features set between normal and build dependencies.
    * 6e236509b2/src/cargo/ops/cargo_compile/mod.rs (L627)
* Hash file contents for `cargo package` to verify if files were modified before and after the build.
  * 6e236509b2/src/cargo/ops/cargo_package.rs (L999)
* Rusc diagnostics deduplication
  * 6e236509b2/src/cargo/core/compiler/job_queue/mod.rs (L311)
* Places using `SourceId` identifier like `registry/src` path,
  and `-Zscript` target directories.

Appendix
--------

Benchmark on x86_64-unknown-linux-gnu

```
bench_hasher/RustcStableHasher/URL
                        time:   [33.843 ps 33.844 ps 33.845 ps]
                        change: [-0.0167% -0.0049% +0.0072%] (p = 0.44 > 0.05)
                        No change in performance detected.
Found 10 outliers among 100 measurements (10.00%)
  5 (5.00%) low severe
  3 (3.00%) high mild
  2 (2.00%) high severe
bench_hasher/SipHasher/URL
                        time:   [18.954 ns 18.954 ns 18.955 ns]
                        change: [-0.1281% -0.0951% -0.0644%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
  3 (3.00%) low severe
  4 (4.00%) low mild
  3 (3.00%) high mild
  4 (4.00%) high severe
bench_hasher/RustcStableHasher/lorem ipsum
                        time:   [659.18 ns 659.20 ns 659.22 ns]
                        change: [-0.0192% -0.0062% +0.0068%] (p = 0.34 > 0.05)
                        No change in performance detected.
Found 12 outliers among 100 measurements (12.00%)
  4 (4.00%) low severe
  3 (3.00%) low mild
  3 (3.00%) high mild
  2 (2.00%) high severe
bench_hasher/SipHasher/lorem ipsum
                        time:   [1.2006 µs 1.2008 µs 1.2010 µs]
                        change: [+0.0117% +0.0467% +0.0808%] (p = 0.01 < 0.05)
                        Change within noise threshold.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild
```
2024-06-20 13:37:45 -04:00
11 changed files with 670 additions and 588 deletions

6
Cargo.lock generated
View File

@ -316,6 +316,7 @@ dependencies = [
"rand",
"regex",
"rusqlite",
"rustc-stable-hash",
"rustfix",
"same-file",
"semver",
@ -2941,6 +2942,11 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-stable-hash"
version = "0.1.0"
source = "git+https://github.com/rust-lang/rustc-stable-hash.git?rev=cb8e141b08fb839606a5f79f9b56087cd54b764d#cb8e141b08fb839606a5f79f9b56087cd54b764d"
[[package]]
name = "rustfix"
version = "0.8.5"

View File

@ -78,6 +78,7 @@ pulldown-cmark = { version = "0.11.0", default-features = false, features = ["ht
rand = "0.8.5"
regex = "1.10.4"
rusqlite = { version = "0.31.0", features = ["bundled"] }
rustc-stable-hash = { git = "https://github.com/rust-lang/rustc-stable-hash.git", rev = "cb8e141b08fb839606a5f79f9b56087cd54b764d" }
rustfix = { version = "0.8.2", path = "crates/rustfix" }
same-file = "1.0.6"
security-framework = "2.10.0"
@ -182,6 +183,7 @@ pathdiff.workspace = true
rand.workspace = true
regex.workspace = true
rusqlite.workspace = true
rustc-stable-hash.workspace = true
rustfix.workspace = true
same-file.workspace = true
semver.workspace = true

View File

@ -171,7 +171,7 @@ fn add_common_redactions(subs: &mut snapbox::Redactions) {
.unwrap();
subs.insert(
"[FILE_SIZE]",
regex!(r"(?<redacted>[0-9]+(\.[0-9]+)([a-zA-Z]i)?)B"),
regex!(r"(?<redacted>[0-9]+(\.[0-9]+)?([a-zA-Z]i)?)B\s"),
)
.unwrap();
subs.insert(

View File

@ -2,7 +2,7 @@
use std::collections::HashMap;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::path::{Path, PathBuf};
use std::sync::Arc;

View File

@ -8,7 +8,7 @@ use anyhow::Context as _;
use serde::Serialize;
use std::collections::BTreeSet;
use std::fs;
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::path::Path;
/// Indicator for how a unit is being compiled.

View File

@ -786,70 +786,87 @@ mod tests {
// Otherwise please just leave a comment in your PR as to why the hash value is
// changing and why the old value can't be easily preserved.
//
// The hash value depends on endianness and bit-width, so we only run this test on
// little-endian 64-bit CPUs (such as x86-64 and ARM64) where it matches the
// well-known value.
// The hash value should be stable across platforms, and doesn't depend on
// endianness and bit-width. One caveat is that absolute paths is inherently
// different on Windows than on Unix-like platforms. Unless we omit or strip
// the prefix components (e.g. `C:`), there is not way to have a
// cross-platform stable hash for absolute paths.
#[test]
#[cfg(all(target_endian = "little", target_pointer_width = "64"))]
fn test_cratesio_hash() {
let gctx = GlobalContext::default().unwrap();
let crates_io = SourceId::crates_io(&gctx).unwrap();
assert_eq!(crate::util::hex::short_hash(&crates_io), "1ecc6299db9ec823");
}
// See the comment in `test_cratesio_hash`.
//
// Only test on non-Windows as paths on Windows will get different hashes.
#[test]
#[cfg(all(target_endian = "little", target_pointer_width = "64", not(windows)))]
fn test_stable_hash() {
use std::hash::Hasher;
use crate::util::StableHasher;
use std::path::Path;
#[cfg(not(windows))]
let ws_root = Path::new("/tmp/ws");
#[cfg(windows)]
let ws_root = Path::new(r"C:\\tmp\ws");
let gen_hash = |source_id: SourceId| {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
source_id.stable_hash(Path::new("/tmp/ws"), &mut hasher);
let mut hasher = StableHasher::new();
source_id.stable_hash(ws_root, &mut hasher);
hasher.finish()
};
let source_id = SourceId::crates_io(&GlobalContext::default().unwrap()).unwrap();
assert_eq!(gen_hash(source_id), 14747226178473219715);
assert_eq!(crate::util::hex::short_hash(&source_id), "83d63c3e13aca8cc");
let url = "https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 18108075011063494626);
assert_eq!(crate::util::hex::short_hash(&source_id), "fb60813d6cb8df79");
assert_eq!(gen_hash(source_id), 2056262832525457700);
assert_eq!(crate::util::hex::short_hash(&source_id), "24b984d12650891c");
let url = "https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 12862859764592646184);
assert_eq!(crate::util::hex::short_hash(&source_id), "09c10fd0cbd74bce");
assert_eq!(gen_hash(source_id), 7851411715584162426);
assert_eq!(crate::util::hex::short_hash(&source_id), "7afabb545bd1f56c");
let url = "sparse+https://my-crates.io".into_url().unwrap();
let source_id = SourceId::for_registry(&url).unwrap();
assert_eq!(gen_hash(source_id), 8763561830438022424);
assert_eq!(crate::util::hex::short_hash(&source_id), "d1ea0d96f6f759b5");
assert_eq!(gen_hash(source_id), 15233380663065439616);
assert_eq!(crate::util::hex::short_hash(&source_id), "80ed51ce00d767d3");
let url = "sparse+https://your-crates.io".into_url().unwrap();
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
assert_eq!(gen_hash(source_id), 5159702466575482972);
assert_eq!(crate::util::hex::short_hash(&source_id), "135d23074253cb78");
assert_eq!(gen_hash(source_id), 12749290624384351691);
assert_eq!(crate::util::hex::short_hash(&source_id), "cbbda5344694eeb0");
let url = "file:///tmp/ws/crate".into_url().unwrap();
let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
assert_eq!(gen_hash(source_id), 15332537265078583985);
assert_eq!(crate::util::hex::short_hash(&source_id), "73a808694abda756");
let path = Path::new("/tmp/ws/crate");
assert_eq!(gen_hash(source_id), 3109465066469481245);
assert_eq!(crate::util::hex::short_hash(&source_id), "1d5b66d8000a272b");
let path = &ws_root.join("crate");
let source_id = SourceId::for_local_registry(path).unwrap();
assert_eq!(gen_hash(source_id), 18446533307730842837);
assert_eq!(crate::util::hex::short_hash(&source_id), "52a84cc73f6fd48b");
#[cfg(not(windows))]
{
assert_eq!(gen_hash(source_id), 17171351456028149232);
assert_eq!(crate::util::hex::short_hash(&source_id), "f0c5f1e92be54cee");
}
#[cfg(windows)]
{
assert_eq!(gen_hash(source_id), 10712195329887934127);
assert_eq!(crate::util::hex::short_hash(&source_id), "af96919ae55ca994");
}
let source_id = SourceId::for_path(path).unwrap();
assert_eq!(gen_hash(source_id), 8764714075439899829);
assert_eq!(crate::util::hex::short_hash(&source_id), "e1ddd48578620fc1");
assert_eq!(gen_hash(source_id), 13241112980875747369);
#[cfg(not(windows))]
assert_eq!(crate::util::hex::short_hash(&source_id), "e5ba2edec163e65a");
#[cfg(windows)]
assert_eq!(crate::util::hex::short_hash(&source_id), "429dd6f2283a9b5c");
let source_id = SourceId::for_directory(path).unwrap();
assert_eq!(gen_hash(source_id), 17459999773908528552);
assert_eq!(crate::util::hex::short_hash(&source_id), "6568fe2c2fab5bfe");
#[cfg(not(windows))]
{
assert_eq!(gen_hash(source_id), 12461124588148212881);
assert_eq!(crate::util::hex::short_hash(&source_id), "91c47582caceeeac");
}
#[cfg(windows)]
{
assert_eq!(gen_hash(source_id), 17000469607053345884);
assert_eq!(crate::util::hex::short_hash(&source_id), "5c443d0709cdedeb");
}
}
#[test]

View File

@ -36,7 +36,7 @@
//! ["Cargo Target"]: https://doc.rust-lang.org/nightly/cargo/reference/cargo-targets.html
use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::sync::Arc;
use crate::core::compiler::unit_dependencies::build_unit_dependencies;

View File

@ -456,7 +456,7 @@ fn short_name(id: SourceId, is_shallow: bool) -> String {
// CAUTION: This should not change between versions. If you change how
// this is computed, it will orphan previously cached data, forcing the
// cache to be rebuilt and potentially wasting significant disk space. If
// you change it, be cautious of the impact. See `test_cratesio_hash` for
// you change it, be cautious of the impact. See `test_stable_hash` for
// a similar discussion.
let hash = hex::short_hash(&id);
let ident = id.url().host_str().unwrap_or("").to_string();

View File

@ -1,23 +1,24 @@
//! Implementation of a hasher that produces the same values across releases.
//! A hasher that produces the same values across releases and platforms.
//!
//! The hasher should be fast and have a low chance of collisions (but is not
//! sufficient for cryptographic purposes).
#![allow(deprecated)]
//! This is a wrapper around [`rustc_stable_hash::StableHasher`].
use std::hash::{Hasher, SipHasher};
pub struct StableHasher(SipHasher);
pub struct StableHasher(rustc_stable_hash::StableHasher);
impl StableHasher {
pub fn new() -> StableHasher {
StableHasher(SipHasher::new())
StableHasher(rustc_stable_hash::StableHasher::new())
}
pub fn finish(self) -> u64 {
self.0.finalize().0
}
}
impl Hasher for StableHasher {
impl std::hash::Hasher for StableHasher {
fn finish(&self) -> u64 {
self.0.finish()
panic!("call StableHasher::finish instead");
}
fn write(&mut self, bytes: &[u8]) {
self.0.write(bytes)
}

View File

@ -1,6 +1,6 @@
use std::collections::hash_map::HashMap;
use std::env;
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::path::{Path, PathBuf};
use std::sync::Mutex;

File diff suppressed because it is too large Load Diff