Auto merge of #12083 - Eh2406:names, r=weihanglo

do not try an exponential number of package names

re #11934, and as discussed in the cargo team meeting, this changes the strategy to "the original, all underscore, and all dashes".

I was excessively proud of the `hyphen_combination_num` based implementation when I came up with it. But it's always been a hack. I'm glad to be the one to remove it.
This commit is contained in:
bors 2023-05-05 17:18:08 +00:00
commit e2d14882a5
3 changed files with 11 additions and 91 deletions

View file

@ -81,91 +81,6 @@ use std::path::Path;
use std::str;
use std::task::{ready, Poll};
/// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not.
/// Therefore, the index must store uncanonicalized version of the name so old Cargo's can find it.
/// This loop tries all possible combinations of switching hyphen and underscores to find the
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
/// as-provided.
pub struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}
impl<'s> UncanonicalizedIter<'s> {
pub fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
num_hyphen_underscore,
hyphen_combination_num: 0,
}
}
}
impl<'s> Iterator for UncanonicalizedIter<'s> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.hyphen_combination_num > 0
&& self.hyphen_combination_num.trailing_zeros() >= self.num_hyphen_underscore
{
return None;
}
let ret = Some(
self.input
.chars()
.scan(0u16, |s, c| {
// the check against 15 here's to prevent
// shift overflow on inputs with more than 15 hyphens
if (c == '_' || c == '-') && *s <= 15 {
let switch = (self.hyphen_combination_num & (1u16 << *s)) > 0;
let out = if (c == '_') ^ switch { '_' } else { '-' };
*s += 1;
Some(out)
} else {
Some(c)
}
})
.collect(),
);
self.hyphen_combination_num += 1;
ret
}
}
#[test]
fn no_hyphen() {
assert_eq!(
UncanonicalizedIter::new("test").collect::<Vec<_>>(),
vec!["test".to_string()]
)
}
#[test]
fn two_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_st").collect::<Vec<_>>(),
vec![
"te-_st".to_string(),
"te__st".to_string(),
"te--st".to_string(),
"te_-st".to_string()
]
)
}
#[test]
fn overflow_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_-_-_-_-_-_-_-_-st")
.take(100)
.count(),
100
)
}
/// Manager for handling the on-disk index.
///
/// Note that local and remote registries store the index differently. Local

View file

@ -850,9 +850,15 @@ impl<'cfg> Source for RegistrySource<'cfg> {
// names to the original name. The resolver will later
// reject any candidates that have the wrong name, and with this it'll
// along the way produce helpful "did you mean?" suggestions.
for name_permutation in
index::UncanonicalizedIter::new(&dep.package_name()).take(1024)
{
// For now we only try the canonical lysing `-` to `_` and vice versa.
// More advanced fuzzy searching become in the future.
for name_permutation in [
dep.package_name().replace('-', "_"),
dep.package_name().replace('_', "-"),
] {
if name_permutation.as_str() == dep.package_name().as_str() {
continue;
}
any_pending |= self
.index
.query_inner(

View file

@ -3166,7 +3166,7 @@ fn not_found_permutations() {
authors = []
[dependencies]
a-b-c = "1.0"
a-b_c = "1.0"
"#,
)
.file("src/lib.rs", "")
@ -3177,7 +3177,7 @@ fn not_found_permutations() {
.with_stderr(
"\
[UPDATING] `dummy-registry` index
error: no matching package named `a-b-c` found
error: no matching package named `a-b_c` found
location searched: registry `crates-io`
required by package `foo v0.0.1 ([ROOT]/foo)`
",
@ -3190,7 +3190,6 @@ required by package `foo v0.0.1 ([ROOT]/foo)`
&[
"/index/a-/b-/a-b-c",
"/index/a-/b_/a-b_c",
"/index/a_/b-/a_b-c",
"/index/a_/b_/a_b_c"
]
);