diff --git a/Cargo.lock b/Cargo.lock index e90f95b99..c11da053c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -334,7 +334,6 @@ dependencies = [ "pretty_env_logger", "pulldown-cmark", "rand", - "regex", "rustfix", "same-file", "semver", diff --git a/Cargo.toml b/Cargo.toml index d6d7e76d6..0e788a6af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,7 +67,6 @@ pretty_env_logger = "0.4" proptest = "1.1.0" pulldown-cmark = { version = "0.9.2", default-features = false } rand = "0.8.5" -regex = "1.8.3" rustfix = "0.6.0" same-file = "1.0.6" security-framework = "2.0.0" @@ -153,7 +152,6 @@ pathdiff.workspace = true pretty_env_logger = { workspace = true, optional = true } pulldown-cmark.workspace = true rand.workspace = true -regex.workspace = true rustfix.workspace = true semver.workspace = true serde = { workspace = true, features = ["derive"] } diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 2200c53f5..5ecb9112d 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -247,156 +247,271 @@ fn write_if_changed(path: &std::path::Path, new: &str) -> CargoResult<()> { /// Locates a "code block manifest" in Rust source. fn extract_comment(input: &str) -> CargoResult { - let re_crate_comment = regex::Regex::new( - // We need to find the first `/*!` or `//!` that *isn't* preceded by something that would - // make it apply to anything other than the crate itself. Because we can't do this - // accurately, we'll just require that the doc-comment is the *first* thing in the file - // (after the optional shebang). - r"(?x)(^\s*|^\#![^\[].*?(\r\n|\n))(/\*!|//(!|/))", - ) - .unwrap(); - let re_margin = regex::Regex::new(r"^\s*\*( |$)").unwrap(); - let re_space = regex::Regex::new(r"^(\s+)").unwrap(); - let re_nesting = regex::Regex::new(r"/\*|\*/").unwrap(); - let re_comment = regex::Regex::new(r"^\s*//(!|/)").unwrap(); - - fn n_leading_spaces(s: &str, n: usize) -> anyhow::Result<()> { - if !s.chars().take(n).all(|c| c == ' ') { - anyhow::bail!("leading {n:?} chars aren't all spaces: {s:?}") + let mut doc_fragments = Vec::new(); + let file = syn::parse_file(input)?; + // HACK: `syn` doesn't tell us what kind of comment was used, so infer it from how many + // attributes were used + let kind = if 1 < file + .attrs + .iter() + .filter(|attr| attr.meta.path().is_ident("doc")) + .count() + { + CommentKind::Line + } else { + CommentKind::Block + }; + for attr in &file.attrs { + if attr.meta.path().is_ident("doc") { + doc_fragments.push(DocFragment::new(attr, kind)?); } - Ok(()) + } + if doc_fragments.is_empty() { + anyhow::bail!("no doc-comment found"); + } + unindent_doc_fragments(&mut doc_fragments); + + let mut doc_comment = String::new(); + for frag in &doc_fragments { + add_doc_fragment(&mut doc_comment, frag); } - /// Returns a slice of the input string with the leading shebang, if there is one, omitted. - fn strip_shebang(s: &str) -> &str { - let re_shebang = regex::Regex::new(r"^#![^\[].*?(\r\n|\n)").unwrap(); - re_shebang.find(s).map(|m| &s[m.end()..]).unwrap_or(s) + Ok(doc_comment) +} + +/// A `#[doc]` +#[derive(Clone, Debug)] +struct DocFragment { + /// The attribute value + doc: String, + /// Indentation used within `doc + indent: usize, +} + +impl DocFragment { + fn new(attr: &syn::Attribute, kind: CommentKind) -> CargoResult { + let syn::Meta::NameValue(nv) = &attr.meta else { + anyhow::bail!("unsupported attr meta for {:?}", attr.meta.path()) + }; + let syn::Expr::Lit(syn::ExprLit { lit: syn::Lit::Str(lit), .. }) = &nv.value else { + anyhow::bail!("only string literals are supported") + }; + Ok(Self { + doc: beautify_doc_string(lit.value(), kind), + indent: 0, + }) + } +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum CommentKind { + Line, + Block, +} + +/// Makes a doc string more presentable to users. +/// Used by rustdoc and perhaps other tools, but not by rustc. +/// +/// See `rustc_ast/util/comments.rs` +fn beautify_doc_string(data: String, kind: CommentKind) -> String { + fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> { + let mut i = 0; + let mut j = lines.len(); + // first line of all-stars should be omitted + if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { + i += 1; + } + + // like the first, a last line of all stars should be omitted + if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') { + j -= 1; + } + + if i != 0 || j != lines.len() { + Some((i, j)) + } else { + None + } } - // First, we will look for and slice out a contiguous, inner doc-comment which must be *the - // very first thing* in the file. `#[doc(...)]` attributes *are not supported*. Multiple - // single-line comments cannot have any blank lines between them. - let input = strip_shebang(input); // `re_crate_comment` doesn't work with shebangs - let start = re_crate_comment - .captures(input) - .ok_or_else(|| anyhow::format_err!("no doc-comment found"))? - .get(3) - .ok_or_else(|| anyhow::format_err!("no doc-comment found"))? - .start(); + fn get_horizontal_trim(lines: &[&str], kind: CommentKind) -> Option { + let mut i = usize::MAX; + let mut first = true; - let input = &input[start..]; + // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are + // present. However, we first need to strip the empty lines so they don't get in the middle + // when we try to compute the "horizontal trim". + let lines = match kind { + CommentKind::Block => { + // Whatever happens, we skip the first line. + let mut i = lines + .get(0) + .map(|l| { + if l.trim_start().starts_with('*') { + 0 + } else { + 1 + } + }) + .unwrap_or(0); + let mut j = lines.len(); - if let Some(input) = input.strip_prefix("/*!") { - // On every line: - // - // - update nesting level and detect end-of-comment - // - if margin is None: - // - if there appears to be a margin, set margin. - // - strip off margin marker - // - update the leading space counter - // - strip leading space - // - append content - let mut r = String::new(); - - let mut leading_space = None; - let mut margin = None; - let mut depth: u32 = 1; - - for line in input.lines() { - if depth == 0 { - break; + while i < j && lines[i].trim().is_empty() { + i += 1; + } + while j > i && lines[j - 1].trim().is_empty() { + j -= 1; + } + &lines[i..j] } + CommentKind::Line => lines, + }; - // Update nesting and look for end-of-comment. - let mut end_of_comment = None; - - for (end, marker) in re_nesting.find_iter(line).map(|m| (m.start(), m.as_str())) { - match (marker, depth) { - ("/*", _) => depth += 1, - ("*/", 1) => { - end_of_comment = Some(end); - depth = 0; - break; + for line in lines { + for (j, c) in line.chars().enumerate() { + if j > i || !"* \t".contains(c) { + return None; + } + if c == '*' { + if first { + i = j; + first = false; + } else if i != j { + return None; } - ("*/", _) => depth -= 1, - _ => panic!("got a comment marker other than /* or */"), + break; } } + if i >= line.len() { + return None; + } + } + if lines.is_empty() { + None + } else { + Some(lines[0][..i].into()) + } + } - let line = end_of_comment.map(|end| &line[..end]).unwrap_or(line); + let data_s = data.as_str(); + if data_s.contains('\n') { + let mut lines = data_s.lines().collect::>(); + let mut changes = false; + let lines = if let Some((i, j)) = get_vertical_trim(&lines) { + changes = true; + // remove whitespace-only lines from the start/end of lines + &mut lines[i..j] + } else { + &mut lines + }; + if let Some(horizontal) = get_horizontal_trim(lines, kind) { + changes = true; + // remove a "[ \t]*\*" block from each line, if possible + for line in lines.iter_mut() { + if let Some(tmp) = line.strip_prefix(&horizontal) { + *line = tmp; + if kind == CommentKind::Block + && (*line == "*" || line.starts_with("* ") || line.starts_with("**")) + { + *line = &line[1..]; + } + } + } + } + if changes { + return lines.join("\n"); + } + } + data +} - // Detect and strip margin. - margin = margin.or_else(|| re_margin.find(line).map(|m| m.as_str())); +/// Removes excess indentation on comments in order for the Markdown +/// to be parsed correctly. This is necessary because the convention for +/// writing documentation is to provide a space between the /// or //! marker +/// and the doc text, but Markdown is whitespace-sensitive. For example, +/// a block of text with four-space indentation is parsed as a code block, +/// so if we didn't unindent comments, these list items +/// +/// /// A list: +/// /// +/// /// - Foo +/// /// - Bar +/// +/// would be parsed as if they were in a code block, which is likely not what the user intended. +/// +/// See also `rustc_resolve/rustdoc.rs` +fn unindent_doc_fragments(docs: &mut [DocFragment]) { + // HACK: We can't tell the difference between `#[doc]` and doc-comments, so we can't specialize + // the indentation like rustodc does + let add = 0; - let line = if let Some(margin) = margin { - let end = line - .char_indices() - .take(margin.len()) - .map(|(i, c)| i + c.len_utf8()) - .last() - .unwrap_or(0); - &line[end..] - } else { - line - }; + // `min_indent` is used to know how much whitespaces from the start of each lines must be + // removed. Example: + // + // ``` + // /// hello! + // #[doc = "another"] + // ``` + // + // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum + // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4 + // (5 - 1) whitespaces. + let Some(min_indent) = docs + .iter() + .map(|fragment| { + fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| { + if line.chars().all(|c| c.is_whitespace()) { + min_indent + } else { + // Compare against either space or tab, ignoring whether they are + // mixed or not. + let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); + min_indent.min(whitespace) + } + }) + }) + .min() + else { + return; + }; - // Detect and strip leading indentation. - leading_space = leading_space.or_else(|| re_space.find(line).map(|m| m.end())); - - // Make sure we have only leading spaces. - // - // If we see a tab, fall over. I *would* expand them, but that gets into the question of how *many* spaces to expand them to, and *where* is the tab, because tabs are tab stops and not just N spaces. - n_leading_spaces(line, leading_space.unwrap_or(0))?; - - let strip_len = line.len().min(leading_space.unwrap_or(0)); - let line = &line[strip_len..]; - - // Done. - r.push_str(line); - - // `lines` removes newlines. Ideally, it wouldn't do that, but hopefully this shouldn't cause any *real* problems. - r.push('\n'); + for fragment in docs { + if fragment.doc.is_empty() { + continue; } - Ok(r) - } else if input.starts_with("//!") || input.starts_with("///") { - let mut r = String::new(); + let min_indent = if min_indent > 0 { + min_indent - add + } else { + min_indent + }; - let mut leading_space = None; + fragment.indent = min_indent; + } +} - for line in input.lines() { - // Strip leading comment marker. - let content = match re_comment.find(line) { - Some(m) => &line[m.end()..], - None => break, - }; - - // Detect and strip leading indentation. - leading_space = leading_space.or_else(|| { - re_space - .captures(content) - .and_then(|c| c.get(1)) - .map(|m| m.end()) - }); - - // Make sure we have only leading spaces. - // - // If we see a tab, fall over. I *would* expand them, but that gets into the question of how *many* spaces to expand them to, and *where* is the tab, because tabs are tab stops and not just N spaces. - n_leading_spaces(content, leading_space.unwrap_or(0))?; - - let strip_len = content.len().min(leading_space.unwrap_or(0)); - let content = &content[strip_len..]; - - // Done. - r.push_str(content); - - // `lines` removes newlines. Ideally, it wouldn't do that, but hopefully this shouldn't cause any *real* problems. - r.push('\n'); +/// The goal of this function is to apply the `DocFragment` transformation that is required when +/// transforming into the final Markdown, which is applying the computed indent to each line in +/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`). +/// +/// Note: remove the trailing newline where appropriate +/// +/// See also `rustc_resolve/rustdoc.rs` +fn add_doc_fragment(out: &mut String, frag: &DocFragment) { + let s = frag.doc.as_str(); + let mut iter = s.lines(); + if s.is_empty() { + out.push('\n'); + return; + } + while let Some(line) = iter.next() { + if line.chars().any(|c| !c.is_whitespace()) { + assert!(line.len() >= frag.indent); + out.push_str(&line[frag.indent..]); + } else { + out.push_str(line); } - - Ok(r) - } else { - Err(anyhow::format_err!("no doc-comment found")) + out.push('\n'); } } @@ -593,14 +708,12 @@ fn main() {} #[test] fn test_multiline_comment() { snapbox::assert_eq( - r#" -Here is a manifest: + r#"Here is a manifest: ```cargo [dependencies] time = "*" ``` - "#, ec!(r#"/*! Here is a manifest: @@ -620,14 +733,12 @@ fn main() { #[test] fn test_multiline_comment_shebang() { snapbox::assert_eq( - r#" -Here is a manifest: + r#"Here is a manifest: ```cargo [dependencies] time = "*" ``` - "#, ec!(r#"#!/usr/bin/env cargo-eval @@ -649,14 +760,12 @@ fn main() { #[test] fn test_multiline_block_comment() { snapbox::assert_eq( - r#" -Here is a manifest: + r#"Here is a manifest: ```cargo [dependencies] time = "*" ``` - "#, ec!(r#"/*! * Here is a manifest: @@ -674,14 +783,12 @@ fn main() {} #[test] fn test_multiline_block_comment_shebang() { snapbox::assert_eq( - r#" -Here is a manifest: + r#"Here is a manifest: ```cargo [dependencies] time = "*" ``` - "#, ec!(r#"#!/usr/bin/env cargo-eval @@ -697,6 +804,58 @@ fn main() {} "#), ); } + + #[test] + fn test_adjacent_comments() { + snapbox::assert_eq( + r#"Here is a manifest: + +```cargo +[dependencies] +time = "*" +``` +"#, + ec!(r#"#!/usr/bin/env cargo-eval + +// I am a normal comment +//! Here is a manifest: +//! +//! ```cargo +//! [dependencies] +//! time = "*" +//! ``` + +fn main () { +} +"#), + ); + } + + #[test] + fn test_doc_attrib() { + snapbox::assert_eq( + r#"Here is a manifest: + +```cargo +[dependencies] +time = "*" +``` +"#, + ec!(r###"#!/usr/bin/env cargo-eval + +#![doc = r#"Here is a manifest: + +```cargo +[dependencies] +time = "*" +``` +"#] + +fn main () { +} +"###), + ); + } } /// Given a Cargo manifest, attempts to rewrite relative file paths to absolute ones, allowing the manifest to be relocated.