Auto merge of #12255 - epage:sanitize, r=weihanglo

fix(embedded): Align package name sanitization with cargo-new

### What does this PR try to resolve?

This is a follow up to #12245 which is working to resolve the tracking issue #12207

This first aligns sanitization of package names with the central package name validation logic, putting the code next to each other so they can more easily stay in sync.

Oddly enough, cargo-new is stricter than our normal package-name validation.  I went ahead and sanitized along with that as well.

In working on this, I was bothered by
- the mix of `-` and `_` in file names because of sanitization, so I made it more consistent by detecting which the user is using
- -using `_` in bins, so I switched the default to `-`

### How should we test and review this PR?

One existing test covers a variety of sanitization needs

Another existing test hit one of the other cases (`test` being reserved)

### Additional information

For implementation convenience, I changed the directory we write the manifest to.  The impact of this should be minimal since
- We reuse the full file name, so if it worked for the user it should work for us
- We should be moving away from the temp manifest in future commits
This commit is contained in:
bors 2023-06-17 01:10:59 +00:00
commit 3b5fac59a3
4 changed files with 61 additions and 29 deletions

View file

@ -163,6 +163,7 @@ fn get_name<'a>(path: &'a Path, opts: &'a NewOptions) -> CargoResult<&'a str> {
})
}
/// See also `util::toml::embedded::sanitize_name`
fn check_name(
name: &str,
show_name_help: bool,

View file

@ -83,6 +83,30 @@ pub fn validate_package_name(name: &str, what: &str, help: &str) -> CargoResult<
Ok(())
}
/// Ensure a package name is [valid][validate_package_name]
pub fn sanitize_package_name(name: &str, placeholder: char) -> String {
let mut slug = String::new();
let mut chars = name.chars();
if let Some(ch) = chars.next() {
if ch.is_digit(10) {
slug.push(placeholder);
slug.push(ch);
} else if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' {
slug.push(ch);
} else {
slug.push(placeholder);
}
}
for ch in chars {
if unicode_xid::UnicodeXID::is_xid_continue(ch) || ch == '-' {
slug.push(ch);
} else {
slug.push(placeholder);
}
}
slug
}
/// Check the entire path for names reserved in Windows.
pub fn is_windows_reserved_path(path: &Path) -> bool {
path.iter()

View file

@ -1,6 +1,7 @@
use anyhow::Context as _;
use crate::core::Workspace;
use crate::util::restricted_names;
use crate::CargoResult;
use crate::Config;
@ -79,8 +80,7 @@ fn write(
.file_stem()
.ok_or_else(|| anyhow::format_err!("no file name"))?
.to_string_lossy();
let separator = '_';
let name = sanitize_package_name(file_name.as_ref(), separator);
let name = sanitize_name(file_name.as_ref());
let mut workspace_root = target_dir.to_owned();
workspace_root.push("eval");
@ -140,8 +140,7 @@ fn expand_manifest_(script: &RawScript, config: &Config) -> CargoResult<toml::Ta
.file_stem()
.ok_or_else(|| anyhow::format_err!("no file name"))?
.to_string_lossy();
let separator = '_';
let name = sanitize_package_name(file_name.as_ref(), separator);
let name = sanitize_name(file_name.as_ref());
let bin_name = name.clone();
package
.entry("name".to_owned())
@ -193,27 +192,35 @@ fn expand_manifest_(script: &RawScript, config: &Config) -> CargoResult<toml::Ta
Ok(manifest)
}
fn sanitize_package_name(name: &str, placeholder: char) -> String {
let mut slug = String::new();
for (i, c) in name.chars().enumerate() {
match (i, c) {
(0, '0'..='9') => {
slug.push(placeholder);
slug.push(c);
}
(_, '0'..='9') | (_, 'a'..='z') | (_, '_') | (_, '-') => {
slug.push(c);
}
(_, 'A'..='Z') => {
// Convert uppercase characters to lowercase to avoid `non_snake_case` warnings.
slug.push(c.to_ascii_lowercase());
}
(_, _) => {
slug.push(placeholder);
}
/// Ensure the package name matches the validation from `ops::cargo_new::check_name`
fn sanitize_name(name: &str) -> String {
let placeholder = if name.contains('_') {
'_'
} else {
// Since embedded manifests only support `[[bin]]`s, prefer arrow-case as that is the
// more common convention for CLIs
'-'
};
let mut name = restricted_names::sanitize_package_name(name, placeholder);
loop {
if restricted_names::is_keyword(&name) {
name.push(placeholder);
} else if restricted_names::is_conflicting_artifact_name(&name) {
// Being an embedded manifest, we always assume it is a `[[bin]]`
name.push(placeholder);
} else if name == "test" {
name.push(placeholder);
} else if restricted_names::is_windows_reserved(&name) {
// Go ahead and be consistent across platforms
name.push(placeholder);
} else {
break;
}
}
slug
name
}
fn hash(script: &RawScript) -> blake3::Hash {
@ -448,12 +455,12 @@ mod test_expand {
fn test_default() {
snapbox::assert_eq(
r#"[[bin]]
name = "test"
name = "test-"
path = "/home/me/test.rs"
[package]
edition = "2021"
name = "test"
name = "test-"
publish = false
version = "0.0.0"
@ -470,7 +477,7 @@ strip = true
fn test_dependencies() {
snapbox::assert_eq(
r#"[[bin]]
name = "test"
name = "test-"
path = "/home/me/test.rs"
[dependencies]
@ -478,7 +485,7 @@ time = "0.1.25"
[package]
edition = "2021"
name = "test"
name = "test-"
publish = false
version = "0.0.0"

View file

@ -426,9 +426,9 @@ args: []
)
.with_stderr(
r#"[WARNING] `package.edition` is unspecifiead, defaulting to `2021`
[COMPILING] s-h_w_c_ v0.0.0 ([ROOT]/home/.cargo/eval/target/eval/[..]/s-h_w_c_)
[COMPILING] s-h-w-c- v0.0.0 ([ROOT]/home/.cargo/eval/target/eval/[..]/s-h-w-c-)
[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s
[RUNNING] `[ROOT]/home/.cargo/eval/target/eval/[..]/s-h_w_c_/target/debug/s-h_w_c_[EXE]`
[RUNNING] `[ROOT]/home/.cargo/eval/target/eval/[..]/s-h-w-c-/target/debug/s-h-w-c-[EXE]`
"#,
)
.run();