Auto merge of #11937 - arlosi:fuzzy, r=ehuss

Stop using UncanonicalizedIter for QueryKind::Exact

`QueryKind::Exact` causes unnecessary HTTP requests when querying for crates that don't exist. Even though the query is `Exact`, when fetching `Summaries`, Cargo still uses `UncanonicalizedIter` and requests additional possible crate names if the first one isn't found.

This change moves the use of `UncanonicalizedIter` further up the call stack such that we have the `QueryKind` available, and only do the additional queries for `QueryKind::Fuzzy`.

The impact is most noticeable when publishing a new crate that contains `-` or `_`. Since Cargo waits for publish by querying the registry, if the crate name is `my-example-test-crate`, Cargo currently makes 8 HTTP requests each second while waiting for the crate to be available. With this fix, Cargo makes only 1 request per second.

cc #11934
This commit is contained in:
bors 2023-04-06 16:38:29 +00:00
commit 60aaca6d5e
3 changed files with 107 additions and 78 deletions

View File

@ -66,7 +66,6 @@
//! details like invalidating caches and whatnot which are handled below, but
//! hopefully those are more obvious inline in the code itself.
use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
use crate::util::interning::InternedString;
@ -87,14 +86,14 @@ use std::task::{ready, Poll};
/// This loop tries all possible combinations of switching hyphen and underscores to find the
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
/// as-provided.
struct UncanonicalizedIter<'s> {
pub struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}
impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
pub fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
@ -267,7 +266,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// Returns the hash listed for a specified `PackageId`.
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
let req = OptVersionReq::exact(pkg.version());
let summary = self.summaries(pkg.name(), &req, load)?;
let summary = self.summaries(&pkg.name(), &req, load)?;
let summary = ready!(summary).next();
Poll::Ready(Ok(summary
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
@ -285,7 +284,7 @@ impl<'cfg> RegistryIndex<'cfg> {
/// though since this method is called quite a lot on null builds in Cargo.
pub fn summaries<'a, 'b>(
&'a mut self,
name: InternedString,
name: &str,
req: &'b OptVersionReq,
load: &mut dyn RegistryData,
) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
@ -299,6 +298,7 @@ impl<'cfg> RegistryIndex<'cfg> {
// has run previously this will parse a Cargo-specific cache file rather
// than the registry itself. In effect this is intended to be a quite
// cheap operation.
let name = InternedString::new(name);
let summaries = ready!(self.load_summaries(name, load)?);
// Iterate over our summaries, extract all relevant ones which match our
@ -361,45 +361,17 @@ impl<'cfg> RegistryIndex<'cfg> {
.flat_map(|c| c.to_lowercase())
.collect::<String>();
let mut any_pending = false;
// Attempt to handle misspellings by searching for a chain of related
// names to the original `fs_name` name. Only return summaries
// associated with the first hit, however. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for (i, name_permutation) in UncanonicalizedIter::new(&fs_name).take(1024).enumerate() {
let path = make_dep_path(&name_permutation, false);
let summaries = Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
)?;
if summaries.is_pending() {
if i == 0 {
// If we have not herd back about the name as requested
// then don't ask about other spellings yet.
// This prevents us spamming all the variations in the
// case where we have the correct spelling.
return Poll::Pending;
}
any_pending = true;
}
if let Poll::Ready(Some(summaries)) = summaries {
self.summaries_cache.insert(name, summaries);
return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
}
}
if any_pending {
return Poll::Pending;
}
// If nothing was found then this crate doesn't exists, so just use an
// empty `Summaries` list.
self.summaries_cache.insert(name, Summaries::default());
let path = make_dep_path(&fs_name, false);
let summaries = ready!(Summaries::parse(
root,
&cache_root,
path.as_ref(),
self.source_id,
load,
self.config,
))?
.unwrap_or_default();
self.summaries_cache.insert(name, summaries);
Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
}
@ -410,7 +382,8 @@ impl<'cfg> RegistryIndex<'cfg> {
pub fn query_inner(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
@ -426,17 +399,20 @@ impl<'cfg> RegistryIndex<'cfg> {
// then cargo will fail to download and an error message
// indicating that the required dependency is unavailable while
// offline will be displayed.
if ready!(self.query_inner_with_online(dep, load, yanked_whitelist, f, false)?) > 0 {
if ready!(self.query_inner_with_online(name, req, load, yanked_whitelist, f, false)?)
> 0
{
return Poll::Ready(Ok(()));
}
}
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)
self.query_inner_with_online(name, req, load, yanked_whitelist, f, true)
.map_ok(|_| ())
}
fn query_inner_with_online(
&mut self,
dep: &Dependency,
name: &str,
req: &OptVersionReq,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
@ -444,7 +420,7 @@ impl<'cfg> RegistryIndex<'cfg> {
) -> Poll<CargoResult<usize>> {
let source_id = self.source_id;
let summaries = ready!(self.summaries(dep.package_name(), dep.version_req(), load))?;
let summaries = ready!(self.summaries(name, req, load))?;
let summaries = summaries
// First filter summaries for `--offline`. If we're online then
@ -469,7 +445,6 @@ impl<'cfg> RegistryIndex<'cfg> {
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
// this source, `<p_req>` is the version installed and `<f_req> is the
// version requested (argument to `--precise`).
let name = dep.package_name().as_str();
let precise = match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
@ -481,7 +456,7 @@ impl<'cfg> RegistryIndex<'cfg> {
};
let summaries = summaries.filter(|s| match &precise {
Some((current, requested)) => {
if dep.version_req().matches(current) {
if req.matches(current) {
// Unfortunately crates.io allows versions to differ only
// by build metadata. This shouldn't be allowed, but since
// it is, this will honor it if requested. However, if not
@ -521,7 +496,7 @@ impl<'cfg> RegistryIndex<'cfg> {
) -> Poll<CargoResult<bool>> {
let req = OptVersionReq::exact(pkg.version());
let found = self
.summaries(pkg.name(), &req, load)
.summaries(&pkg.name(), &req, load)
.map_ok(|mut p| p.any(|summary| summary.yanked));
found
}

View File

@ -164,7 +164,7 @@ use std::collections::HashSet;
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::task::Poll;
use std::task::{ready, Poll};
use anyhow::Context as _;
use cargo_util::paths::{self, exclude_from_backups_and_indexing};
@ -756,7 +756,7 @@ impl<'cfg> RegistrySource<'cfg> {
let req = OptVersionReq::exact(package.version());
let summary_with_cksum = self
.index
.summaries(package.name(), &req, &mut *self.ops)?
.summaries(&package.name(), &req, &mut *self.ops)?
.expect("a downloaded dep now pending!?")
.map(|s| s.summary.clone())
.next()
@ -786,36 +786,73 @@ impl<'cfg> Source for RegistrySource<'cfg> {
{
debug!("attempting query without update");
let mut called = false;
let pend =
self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
})?;
if pend.is_pending() {
return Poll::Pending;
}
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
if dep.matches(&s) {
called = true;
f(s);
}
},
))?;
if called {
return Poll::Ready(Ok(()));
Poll::Ready(Ok(()))
} else {
debug!("falling back to an update");
self.invalidate_cache();
return Poll::Pending;
Poll::Pending
}
} else {
let mut called = false;
ready!(self.index.query_inner(
&dep.package_name(),
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
&mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
called = true;
}
}
))?;
if called {
return Poll::Ready(Ok(()));
}
let mut any_pending = false;
if kind == QueryKind::Fuzzy {
// Attempt to handle misspellings by searching for a chain of related
// names to the original name. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for name_permutation in
index::UncanonicalizedIter::new(&dep.package_name()).take(1024)
{
any_pending |= self
.index
.query_inner(
&name_permutation,
dep.version_req(),
&mut *self.ops,
&self.yanked_whitelist,
f,
)?
.is_pending();
}
}
if any_pending {
Poll::Pending
} else {
Poll::Ready(Ok(()))
}
}
self.index
.query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
let matched = match kind {
QueryKind::Exact => dep.matches(&s),
QueryKind::Fuzzy => true,
};
if matched {
f(s);
}
})
}
fn supports_checksums(&self) -> bool {

View File

@ -2566,6 +2566,8 @@ fn wait_for_first_publish_underscore() {
// Counter for number of tries before the package is "published"
let arc: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
let arc2 = arc.clone();
let misses = Arc::new(Mutex::new(Vec::new()));
let misses2 = misses.clone();
// Registry returns an invalid response.
let registry = registry::RegistryBuilder::new()
@ -2580,6 +2582,14 @@ fn wait_for_first_publish_underscore() {
server.index(req)
}
})
.not_found_handler(move |req, _| {
misses.lock().unwrap().push(req.url.to_string());
Response {
body: b"not found".to_vec(),
code: 404,
headers: vec![],
}
})
.build();
let p = project()
@ -2621,6 +2631,13 @@ You may press ctrl-c to skip waiting; the crate should be available shortly.
let lock = arc2.lock().unwrap();
assert_eq!(*lock, 2);
drop(lock);
{
let misses = misses2.lock().unwrap();
assert!(
misses.len() == 1,
"should only have 1 not found URL; instead found {misses:?}"
);
}
let p = project()
.file(