Merge pull request #4398 from cakebaker/uudoc_simplify_about_text_generation

uudoc, uucore_procs: move markdown parsing to HelpParser
This commit is contained in:
Terts Diepraam 2023-04-20 11:29:16 +02:00 committed by GitHub
commit 19add3a4bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 281 additions and 284 deletions

6
Cargo.lock generated
View file

@ -388,6 +388,7 @@ dependencies = [
"conv",
"filetime",
"glob",
"help_parser",
"hex-literal",
"is-terminal",
"libc",
@ -1102,6 +1103,10 @@ dependencies = [
"ahash",
]
[[package]]
name = "help_parser"
version = "0.0.18"
[[package]]
name = "hermit-abi"
version = "0.1.19"
@ -3394,6 +3399,7 @@ dependencies = [
name = "uucore_procs"
version = "0.0.18"
dependencies = [
"help_parser",
"proc-macro2",
"quote",
]

View file

@ -31,7 +31,7 @@ windows = [ "feat_os_windows" ]
nightly = []
test_unimplemented = []
# * only build `uudoc` when `--feature uudoc` is activated
uudoc = ["zip"]
uudoc = ["zip", "dep:help_parser"]
## features
# "feat_acl" == enable support for ACLs (access control lists; by using`--features feat_acl`)
# NOTE:
@ -358,6 +358,8 @@ selinux = { workspace=true, optional = true }
textwrap = { workspace=true }
zip = { workspace=true, optional = true }
help_parser = { path="src/help_parser", optional = true }
# * uutils
uu_test = { optional=true, version="0.0.18", package="uu_test", path="src/uu/test" }
#

View file

@ -133,7 +133,7 @@ impl<'a, 'b> MDWriter<'a, 'b> {
write!(self.w, "# {}\n\n", self.name)?;
self.additional()?;
self.usage()?;
self.description()?;
self.about()?;
self.options()?;
self.after_help()?;
self.examples()
@ -177,54 +177,34 @@ impl<'a, 'b> MDWriter<'a, 'b> {
}
fn usage(&mut self) -> io::Result<()> {
writeln!(self.w, "\n```")?;
let mut usage: String = self
.command
.render_usage()
.to_string()
.lines()
.map(|l| l.strip_prefix("Usage:").unwrap_or(l))
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join("\n");
usage = usage
.to_string()
.replace(uucore::execution_phrase(), self.name);
writeln!(self.w, "{}", usage)?;
writeln!(self.w, "```")
if let Some(markdown) = &self.markdown {
let usage = help_parser::parse_usage(&markdown);
let usage = usage.replace("{}", self.name);
writeln!(self.w, "\n```")?;
writeln!(self.w, "{}", usage)?;
writeln!(self.w, "```")
} else {
Ok(())
}
}
fn description(&mut self) -> io::Result<()> {
if let Some(after_help) = self.markdown_section("about") {
return writeln!(self.w, "\n\n{}", after_help);
}
if let Some(about) = self
.command
.get_long_about()
.or_else(|| self.command.get_about())
{
writeln!(self.w, "{}", about)
fn about(&mut self) -> io::Result<()> {
if let Some(markdown) = &self.markdown {
writeln!(self.w, "{}", help_parser::parse_about(&markdown))
} else {
Ok(())
}
}
fn after_help(&mut self) -> io::Result<()> {
if let Some(after_help) = self.markdown_section("after help") {
return writeln!(self.w, "\n\n{}", after_help);
if let Some(markdown) = &self.markdown {
if let Some(after_help) = help_parser::parse_section("after help", &markdown) {
return writeln!(self.w, "\n\n{after_help}");
}
}
if let Some(after_help) = self
.command
.get_after_long_help()
.or_else(|| self.command.get_after_help())
{
writeln!(self.w, "\n\n{}", after_help)
} else {
Ok(())
}
Ok(())
}
fn examples(&mut self) -> io::Result<()> {
@ -327,32 +307,6 @@ impl<'a, 'b> MDWriter<'a, 'b> {
}
writeln!(self.w, "</dl>\n")
}
fn markdown_section(&self, section: &str) -> Option<String> {
let md = self.markdown.as_ref()?;
let section = section.to_lowercase();
fn is_section_header(line: &str, section: &str) -> bool {
line.strip_prefix("##")
.map_or(false, |l| l.trim().to_lowercase() == section)
}
let result = md
.lines()
.skip_while(|&l| !is_section_header(l, &section))
.skip(1)
.take_while(|l| !l.starts_with("##"))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string();
if !result.is_empty() {
Some(result)
} else {
None
}
}
}
fn get_zip_content(archive: &mut ZipArchive<impl Read + Seek>, name: &str) -> Option<String> {

View file

@ -0,0 +1,5 @@
[package]
name = "help_parser"
version = "0.0.18"
edition = "2021"
license = "MIT"

236
src/help_parser/src/lib.rs Normal file
View file

@ -0,0 +1,236 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! A collection of functions to parse the markdown code of help files.
//!
//! The structure of the markdown code is assumed to be:
//!
//! # util name
//!
//! ```text
//! usage info
//! ```
//!
//! About text
//!
//! ## Section 1
//!
//! Some content
//!
//! ## Section 2
//!
//! Some content
const MARKDOWN_CODE_FENCES: &str = "```";
/// Parses the text between the first markdown code block and the next header, if any,
/// into an about string.
pub fn parse_about(content: &str) -> String {
content
.lines()
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.take_while(|l| !l.starts_with('#'))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
}
/// Parses the first markdown code block into a usage string
///
/// The code fences are removed and the name of the util is replaced
/// with `{}` so that it can be replaced with the appropriate name
/// at runtime.
pub fn parse_usage(content: &str) -> String {
content
.lines()
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.map(|l| {
// Replace the util name (assumed to be the first word) with "{}"
// to be replaced with the runtime value later.
if let Some((_util, args)) = l.split_once(' ') {
format!("{{}} {args}\n")
} else {
"{}\n".to_string()
}
})
.collect::<Vec<_>>()
.join("")
.trim()
.to_string()
}
/// Get a single section from content
///
/// The section must be a second level section (i.e. start with `##`).
pub fn parse_section(section: &str, content: &str) -> Option<String> {
fn is_section_header(line: &str, section: &str) -> bool {
line.strip_prefix("##")
.map_or(false, |l| l.trim().to_lowercase() == section)
}
let section = &section.to_lowercase();
// We cannot distinguish between an empty or non-existing section below,
// so we do a quick test to check whether the section exists
if content.lines().all(|l| !is_section_header(l, section)) {
return None;
}
// Prefix includes space to allow processing of section with level 3-6 headers
let section_header_prefix = "## ";
Some(
content
.lines()
.skip_while(|&l| !is_section_header(l, section))
.skip(1)
.take_while(|l| !l.starts_with(section_header_prefix))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_section() {
let input = "\
# ls\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
assert_eq!(
parse_section("some section", input).unwrap(),
"This is some section"
);
assert_eq!(
parse_section("SOME SECTION", input).unwrap(),
"This is some section"
);
assert_eq!(
parse_section("another section", input).unwrap(),
"This is the other section\nwith multiple lines"
);
}
#[test]
fn test_parse_section_with_sub_headers() {
let input = "\
# ls\n\
## after section\n\
This is some section\n\
\n\
### level 3 header\n\
\n\
Additional text under the section.\n\
\n\
#### level 4 header\n\
\n\
Yet another paragraph\n";
assert_eq!(
parse_section("after section", input).unwrap(),
"This is some section\n\n\
### level 3 header\n\n\
Additional text under the section.\n\n\
#### level 4 header\n\n\
Yet another paragraph"
);
}
#[test]
fn test_parse_non_existing_section() {
let input = "\
# ls\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
assert!(parse_section("non-existing section", input).is_none());
}
#[test]
fn test_parse_usage() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
assert_eq!(parse_usage(input), "{} -l");
}
#[test]
fn test_parse_multi_line_usage() {
let input = "\
# ls\n\
```\n\
ls -a\n\
ls -b\n\
ls -c\n\
```\n\
## some section\n\
This is some section\n";
assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
}
#[test]
fn test_parse_about() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
\n\
This is the about section\n\
\n\
## some section\n\
This is some section\n";
assert_eq!(parse_about(input), "This is the about section");
}
#[test]
fn test_parse_multi_line_about() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
\n\
about a\n\
\n\
about b\n\
\n\
## some section\n\
This is some section\n";
assert_eq!(parse_about(input), "about a\n\nabout b");
}
}

View file

@ -18,3 +18,4 @@ proc-macro = true
[dependencies]
proc-macro2 = "1.0"
quote = "1.0"
help_parser = { path="../help_parser", version="0.0.18" }

View file

@ -6,8 +6,6 @@ use std::{fs::File, io::Read, path::PathBuf};
use proc_macro::{Literal, TokenStream, TokenTree};
use quote::quote;
const MARKDOWN_CODE_FENCES: &str = "```";
//## rust proc-macro background info
//* ref: <https://dev.to/naufraghi/procedural-macro-in-rust-101-k3f> @@ <http://archive.is/Vbr5e>
//* ref: [path construction from LitStr](https://oschwald.github.io/maxminddb-rust/syn/struct.LitStr.html) @@ <http://archive.is/8YDua>
@ -60,7 +58,7 @@ fn render_markdown(s: &str) -> String {
pub fn help_about(input: TokenStream) -> TokenStream {
let input: Vec<TokenTree> = input.into_iter().collect();
let filename = get_argument(&input, 0, "filename");
let text: String = parse_about(&read_help(&filename));
let text: String = help_parser::parse_about(&read_help(&filename));
TokenTree::Literal(Literal::string(&text)).into()
}
@ -74,7 +72,7 @@ pub fn help_about(input: TokenStream) -> TokenStream {
pub fn help_usage(input: TokenStream) -> TokenStream {
let input: Vec<TokenTree> = input.into_iter().collect();
let filename = get_argument(&input, 0, "filename");
let text: String = parse_usage(&read_help(&filename));
let text: String = help_parser::parse_usage(&read_help(&filename));
TokenTree::Literal(Literal::string(&text)).into()
}
@ -107,9 +105,15 @@ pub fn help_section(input: TokenStream) -> TokenStream {
let input: Vec<TokenTree> = input.into_iter().collect();
let section = get_argument(&input, 0, "section");
let filename = get_argument(&input, 1, "filename");
let text = parse_help_section(&section, &read_help(&filename));
let rendered = render_markdown(&text);
TokenTree::Literal(Literal::string(&rendered)).into()
if let Some(text) = help_parser::parse_section(&section, &read_help(&filename)) {
let rendered = render_markdown(&text);
TokenTree::Literal(Literal::string(&rendered)).into()
} else {
panic!(
"The section '{section}' could not be found in the help file. Maybe it is spelled wrong?"
)
}
}
/// Get an argument from the input vector of `TokenTree`.
@ -148,214 +152,3 @@ fn read_help(filename: &str) -> String {
content
}
/// Get a single section from content
///
/// The section must be a second level section (i.e. start with `##`).
fn parse_help_section(section: &str, content: &str) -> String {
fn is_section_header(line: &str, section: &str) -> bool {
line.strip_prefix("##")
.map_or(false, |l| l.trim().to_lowercase() == section)
}
let section = &section.to_lowercase();
// We cannot distinguish between an empty or non-existing section below,
// so we do a quick test to check whether the section exists to provide
// a nice error message.
if content.lines().all(|l| !is_section_header(l, section)) {
panic!(
"The section '{section}' could not be found in the help file. Maybe it is spelled wrong?"
)
}
// Prefix includes space to allow processing of section with level 3-6 headers
let section_header_prefix = "## ";
content
.lines()
.skip_while(|&l| !is_section_header(l, section))
.skip(1)
.take_while(|l| !l.starts_with(section_header_prefix))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
}
/// Parses the first markdown code block into a usage string
///
/// The code fences are removed and the name of the util is replaced
/// with `{}` so that it can be replaced with the appropriate name
/// at runtime.
fn parse_usage(content: &str) -> String {
content
.lines()
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.map(|l| {
// Replace the util name (assumed to be the first word) with "{}"
// to be replaced with the runtime value later.
if let Some((_util, args)) = l.split_once(' ') {
format!("{{}} {args}\n")
} else {
"{}\n".to_string()
}
})
.collect::<Vec<_>>()
.join("")
.trim()
.to_string()
}
/// Parses the text between the first markdown code block and the next header, if any,
/// into an about string.
fn parse_about(content: &str) -> String {
content
.lines()
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
.skip(1)
.take_while(|l| !l.starts_with('#'))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
}
#[cfg(test)]
mod tests {
use super::{parse_about, parse_help_section, parse_usage};
#[test]
fn section_parsing() {
let input = "\
# ls\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
assert_eq!(
parse_help_section("some section", input),
"This is some section"
);
assert_eq!(
parse_help_section("SOME SECTION", input),
"This is some section"
);
assert_eq!(
parse_help_section("another section", input),
"This is the other section\nwith multiple lines"
);
}
#[test]
fn section_parsing_with_additional_headers() {
let input = "\
# ls\n\
## after section\n\
This is some section\n\
\n\
### level 3 header\n\
\n\
Additional text under the section.\n\
\n\
#### level 4 header\n\
\n\
Yet another paragraph\n";
assert_eq!(
parse_help_section("after section", input),
"This is some section\n\n\
### level 3 header\n\n\
Additional text under the section.\n\n\
#### level 4 header\n\n\
Yet another paragraph"
);
}
#[test]
#[should_panic]
fn section_parsing_panic() {
let input = "\
# ls\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
parse_help_section("non-existent section", input);
}
#[test]
fn usage_parsing() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
## some section\n\
This is some section\n\
\n\
## ANOTHER SECTION
This is the other section\n\
with multiple lines\n";
assert_eq!(parse_usage(input), "{} -l");
}
#[test]
fn multi_line_usage_parsing() {
let input = "\
# ls\n\
```\n\
ls -a\n\
ls -b\n\
ls -c\n\
```\n\
## some section\n\
This is some section\n";
assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
}
#[test]
fn about_parsing() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
\n\
This is the about section\n\
\n\
## some section\n\
This is some section\n";
assert_eq!(parse_about(input), "This is the about section");
}
#[test]
fn multi_line_about_parsing() {
let input = "\
# ls\n\
```\n\
ls -l\n\
```\n\
\n\
about a\n\
\n\
about b\n\
\n\
## some section\n\
This is some section\n";
assert_eq!(parse_about(input), "about a\n\nabout b");
}
}