Merge pull request #4398 from cakebaker/uudoc_simplify_about_text_generation

uudoc, uucore_procs: move markdown parsing to HelpParser
2024-10-15 04:14:44 +00:00 · 2023-04-20 11:29:16 +02:00 · 2023-04-20 11:29:16 +02:00 · 19add3a4bd
parent 744529381d 658323184d
commit 19add3a4bd
7 changed files with 281 additions and 284 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -388,6 +388,7 @@ dependencies = [
 "conv",
 "filetime",
 "glob",
+ "help_parser",
 "hex-literal",
 "is-terminal",
 "libc",
@ -1102,6 +1103,10 @@ dependencies = [
 "ahash",
 ]

+[[package]]
+name = "help_parser"
+version = "0.0.18"
+
 [[package]]
 name = "hermit-abi"
 version = "0.1.19"
@ -3394,6 +3399,7 @@ dependencies = [
 name = "uucore_procs"
 version = "0.0.18"
 dependencies = [
+ "help_parser",
 "proc-macro2",
 "quote",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -31,7 +31,7 @@ windows = [ "feat_os_windows" ]
 nightly = []
 test_unimplemented = []
 # * only build `uudoc` when `--feature uudoc` is activated
-uudoc = ["zip"]
+uudoc = ["zip", "dep:help_parser"]
 ## features
 # "feat_acl" == enable support for ACLs (access control lists; by using`--features feat_acl`)
 # NOTE:
@ -358,6 +358,8 @@ selinux = { workspace=true, optional = true }
 textwrap = { workspace=true }
 zip = { workspace=true, optional = true }

+help_parser = { path="src/help_parser", optional = true }
+
 # * uutils
 uu_test  = { optional=true, version="0.0.18", package="uu_test", path="src/uu/test" }
 #
--- a/src/bin/uudoc.rs
+++ b/src/bin/uudoc.rs
@ -133,7 +133,7 @@ impl<'a, 'b> MDWriter<'a, 'b> {
        write!(self.w, "# {}\n\n", self.name)?;
        self.additional()?;
        self.usage()?;
-        self.description()?;
+        self.about()?;
        self.options()?;
        self.after_help()?;
        self.examples()
@ -177,54 +177,34 @@ impl<'a, 'b> MDWriter<'a, 'b> {
    }

    fn usage(&mut self) -> io::Result<()> {
-        writeln!(self.w, "\n```")?;
-        let mut usage: String = self
-            .command
-            .render_usage()
-            .to_string()
-            .lines()
-            .map(|l| l.strip_prefix("Usage:").unwrap_or(l))
-            .map(|l| l.trim())
-            .filter(|l| !l.is_empty())
-            .collect::<Vec<_>>()
-            .join("\n");
-        usage = usage
-            .to_string()
-            .replace(uucore::execution_phrase(), self.name);
-        writeln!(self.w, "{}", usage)?;
-        writeln!(self.w, "```")
+        if let Some(markdown) = &self.markdown {
+            let usage = help_parser::parse_usage(&markdown);
+            let usage = usage.replace("{}", self.name);
+
+            writeln!(self.w, "\n```")?;
+            writeln!(self.w, "{}", usage)?;
+            writeln!(self.w, "```")
+        } else {
+            Ok(())
+        }
    }

-    fn description(&mut self) -> io::Result<()> {
-        if let Some(after_help) = self.markdown_section("about") {
-            return writeln!(self.w, "\n\n{}", after_help);
-        }
-
-        if let Some(about) = self
-            .command
-            .get_long_about()
-            .or_else(|| self.command.get_about())
-        {
-            writeln!(self.w, "{}", about)
+    fn about(&mut self) -> io::Result<()> {
+        if let Some(markdown) = &self.markdown {
+            writeln!(self.w, "{}", help_parser::parse_about(&markdown))
        } else {
            Ok(())
        }
    }

    fn after_help(&mut self) -> io::Result<()> {
-        if let Some(after_help) = self.markdown_section("after help") {
-            return writeln!(self.w, "\n\n{}", after_help);
+        if let Some(markdown) = &self.markdown {
+            if let Some(after_help) = help_parser::parse_section("after help", &markdown) {
+                return writeln!(self.w, "\n\n{after_help}");
+            }
        }

-        if let Some(after_help) = self
-            .command
-            .get_after_long_help()
-            .or_else(|| self.command.get_after_help())
-        {
-            writeln!(self.w, "\n\n{}", after_help)
-        } else {
-            Ok(())
-        }
+        Ok(())
    }

    fn examples(&mut self) -> io::Result<()> {
@ -327,32 +307,6 @@ impl<'a, 'b> MDWriter<'a, 'b> {
        }
        writeln!(self.w, "</dl>\n")
    }
-
-    fn markdown_section(&self, section: &str) -> Option<String> {
-        let md = self.markdown.as_ref()?;
-        let section = section.to_lowercase();
-
-        fn is_section_header(line: &str, section: &str) -> bool {
-            line.strip_prefix("##")
-                .map_or(false, |l| l.trim().to_lowercase() == section)
-        }
-
-        let result = md
-            .lines()
-            .skip_while(|&l| !is_section_header(l, &section))
-            .skip(1)
-            .take_while(|l| !l.starts_with("##"))
-            .collect::<Vec<_>>()
-            .join("\n")
-            .trim()
-            .to_string();
-
-        if !result.is_empty() {
-            Some(result)
-        } else {
-            None
-        }
-    }
 }

 fn get_zip_content(archive: &mut ZipArchive<impl Read + Seek>, name: &str) -> Option<String> {
--- a/src/help_parser/Cargo.toml
+++ b/src/help_parser/Cargo.toml
@ -0,0 +1,5 @@
+[package]
+name = "help_parser"
+version = "0.0.18"
+edition = "2021"
+license = "MIT"
--- a/src/help_parser/src/lib.rs
+++ b/src/help_parser/src/lib.rs
@ -0,0 +1,236 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+//! A collection of functions to parse the markdown code of help files.
+//!
+//! The structure of the markdown code is assumed to be:
+//!
+//! # util name
+//!
+//! ```text
+//! usage info
+//! ```
+//!
+//! About text
+//!
+//! ## Section 1
+//!
+//! Some content
+//!
+//! ## Section 2
+//!
+//! Some content
+
+const MARKDOWN_CODE_FENCES: &str = "```";
+
+/// Parses the text between the first markdown code block and the next header, if any,
+/// into an about string.
+pub fn parse_about(content: &str) -> String {
+    content
+        .lines()
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .take_while(|l| !l.starts_with('#'))
+        .collect::<Vec<_>>()
+        .join("\n")
+        .trim()
+        .to_string()
+}
+
+/// Parses the first markdown code block into a usage string
+///
+/// The code fences are removed and the name of the util is replaced
+/// with `{}` so that it can be replaced with the appropriate name
+/// at runtime.
+pub fn parse_usage(content: &str) -> String {
+    content
+        .lines()
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .map(|l| {
+            // Replace the util name (assumed to be the first word) with "{}"
+            // to be replaced with the runtime value later.
+            if let Some((_util, args)) = l.split_once(' ') {
+                format!("{{}} {args}\n")
+            } else {
+                "{}\n".to_string()
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("")
+        .trim()
+        .to_string()
+}
+
+/// Get a single section from content
+///
+/// The section must be a second level section (i.e. start with `##`).
+pub fn parse_section(section: &str, content: &str) -> Option<String> {
+    fn is_section_header(line: &str, section: &str) -> bool {
+        line.strip_prefix("##")
+            .map_or(false, |l| l.trim().to_lowercase() == section)
+    }
+
+    let section = &section.to_lowercase();
+
+    // We cannot distinguish between an empty or non-existing section below,
+    // so we do a quick test to check whether the section exists
+    if content.lines().all(|l| !is_section_header(l, section)) {
+        return None;
+    }
+
+    // Prefix includes space to allow processing of section with level 3-6 headers
+    let section_header_prefix = "## ";
+
+    Some(
+        content
+            .lines()
+            .skip_while(|&l| !is_section_header(l, section))
+            .skip(1)
+            .take_while(|l| !l.starts_with(section_header_prefix))
+            .collect::<Vec<_>>()
+            .join("\n")
+            .trim()
+            .to_string(),
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_section() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(
+            parse_section("some section", input).unwrap(),
+            "This is some section"
+        );
+        assert_eq!(
+            parse_section("SOME SECTION", input).unwrap(),
+            "This is some section"
+        );
+        assert_eq!(
+            parse_section("another section", input).unwrap(),
+            "This is the other section\nwith multiple lines"
+        );
+    }
+
+    #[test]
+    fn test_parse_section_with_sub_headers() {
+        let input = "\
+            # ls\n\
+            ## after section\n\
+            This is some section\n\
+            \n\
+            ### level 3 header\n\
+            \n\
+            Additional text under the section.\n\
+            \n\
+            #### level 4 header\n\
+            \n\
+            Yet another paragraph\n";
+
+        assert_eq!(
+            parse_section("after section", input).unwrap(),
+            "This is some section\n\n\
+            ### level 3 header\n\n\
+            Additional text under the section.\n\n\
+            #### level 4 header\n\n\
+            Yet another paragraph"
+        );
+    }
+
+    #[test]
+    fn test_parse_non_existing_section() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert!(parse_section("non-existing section", input).is_none());
+    }
+
+    #[test]
+    fn test_parse_usage() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(parse_usage(input), "{} -l");
+    }
+
+    #[test]
+    fn test_parse_multi_line_usage() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -a\n\
+            ls -b\n\
+            ls -c\n\
+            ```\n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
+    }
+
+    #[test]
+    fn test_parse_about() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            \n\
+            This is the about section\n\
+            \n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_about(input), "This is the about section");
+    }
+
+    #[test]
+    fn test_parse_multi_line_about() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            \n\
+            about a\n\
+            \n\
+            about b\n\
+            \n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_about(input), "about a\n\nabout b");
+    }
+}
--- a/src/uucore_procs/Cargo.toml
+++ b/src/uucore_procs/Cargo.toml
@ -18,3 +18,4 @@ proc-macro = true
 [dependencies]
 proc-macro2 = "1.0"
 quote = "1.0"
+help_parser = { path="../help_parser", version="0.0.18" }
--- a/src/uucore_procs/src/lib.rs
+++ b/src/uucore_procs/src/lib.rs
@ -6,8 +6,6 @@ use std::{fs::File, io::Read, path::PathBuf};
 use proc_macro::{Literal, TokenStream, TokenTree};
 use quote::quote;

-const MARKDOWN_CODE_FENCES: &str = "```";
-
 //## rust proc-macro background info
 //* ref: <https://dev.to/naufraghi/procedural-macro-in-rust-101-k3f> @@ <http://archive.is/Vbr5e>
 //* ref: [path construction from LitStr](https://oschwald.github.io/maxminddb-rust/syn/struct.LitStr.html) @@ <http://archive.is/8YDua>
@ -60,7 +58,7 @@ fn render_markdown(s: &str) -> String {
 pub fn help_about(input: TokenStream) -> TokenStream {
    let input: Vec<TokenTree> = input.into_iter().collect();
    let filename = get_argument(&input, 0, "filename");
-    let text: String = parse_about(&read_help(&filename));
+    let text: String = help_parser::parse_about(&read_help(&filename));
    TokenTree::Literal(Literal::string(&text)).into()
 }

@ -74,7 +72,7 @@ pub fn help_about(input: TokenStream) -> TokenStream {
 pub fn help_usage(input: TokenStream) -> TokenStream {
    let input: Vec<TokenTree> = input.into_iter().collect();
    let filename = get_argument(&input, 0, "filename");
-    let text: String = parse_usage(&read_help(&filename));
+    let text: String = help_parser::parse_usage(&read_help(&filename));
    TokenTree::Literal(Literal::string(&text)).into()
 }

@ -107,9 +105,15 @@ pub fn help_section(input: TokenStream) -> TokenStream {
    let input: Vec<TokenTree> = input.into_iter().collect();
    let section = get_argument(&input, 0, "section");
    let filename = get_argument(&input, 1, "filename");
-    let text = parse_help_section(&section, &read_help(&filename));
-    let rendered = render_markdown(&text);
-    TokenTree::Literal(Literal::string(&rendered)).into()
+
+    if let Some(text) = help_parser::parse_section(&section, &read_help(&filename)) {
+        let rendered = render_markdown(&text);
+        TokenTree::Literal(Literal::string(&rendered)).into()
+    } else {
+        panic!(
+            "The section '{section}' could not be found in the help file. Maybe it is spelled wrong?"
+        )
+    }
 }

 /// Get an argument from the input vector of `TokenTree`.
@ -148,214 +152,3 @@ fn read_help(filename: &str) -> String {

    content
 }
-
-/// Get a single section from content
-///
-/// The section must be a second level section (i.e. start with `##`).
-fn parse_help_section(section: &str, content: &str) -> String {
-    fn is_section_header(line: &str, section: &str) -> bool {
-        line.strip_prefix("##")
-            .map_or(false, |l| l.trim().to_lowercase() == section)
-    }
-
-    let section = &section.to_lowercase();
-
-    // We cannot distinguish between an empty or non-existing section below,
-    // so we do a quick test to check whether the section exists to provide
-    // a nice error message.
-    if content.lines().all(|l| !is_section_header(l, section)) {
-        panic!(
-            "The section '{section}' could not be found in the help file. Maybe it is spelled wrong?"
-        )
-    }
-
-    // Prefix includes space to allow processing of section with level 3-6 headers
-    let section_header_prefix = "## ";
-
-    content
-        .lines()
-        .skip_while(|&l| !is_section_header(l, section))
-        .skip(1)
-        .take_while(|l| !l.starts_with(section_header_prefix))
-        .collect::<Vec<_>>()
-        .join("\n")
-        .trim()
-        .to_string()
-}
-
-/// Parses the first markdown code block into a usage string
-///
-/// The code fences are removed and the name of the util is replaced
-/// with `{}` so that it can be replaced with the appropriate name
-/// at runtime.
-fn parse_usage(content: &str) -> String {
-    content
-        .lines()
-        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
-        .skip(1)
-        .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
-        .map(|l| {
-            // Replace the util name (assumed to be the first word) with "{}"
-            // to be replaced with the runtime value later.
-            if let Some((_util, args)) = l.split_once(' ') {
-                format!("{{}} {args}\n")
-            } else {
-                "{}\n".to_string()
-            }
-        })
-        .collect::<Vec<_>>()
-        .join("")
-        .trim()
-        .to_string()
-}
-
-/// Parses the text between the first markdown code block and the next header, if any,
-/// into an about string.
-fn parse_about(content: &str) -> String {
-    content
-        .lines()
-        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
-        .skip(1)
-        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
-        .skip(1)
-        .take_while(|l| !l.starts_with('#'))
-        .collect::<Vec<_>>()
-        .join("\n")
-        .trim()
-        .to_string()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::{parse_about, parse_help_section, parse_usage};
-
-    #[test]
-    fn section_parsing() {
-        let input = "\
-            # ls\n\
-            ## some section\n\
-            This is some section\n\
-            \n\
-            ## ANOTHER SECTION
-            This is the other section\n\
-            with multiple lines\n";
-
-        assert_eq!(
-            parse_help_section("some section", input),
-            "This is some section"
-        );
-        assert_eq!(
-            parse_help_section("SOME SECTION", input),
-            "This is some section"
-        );
-        assert_eq!(
-            parse_help_section("another section", input),
-            "This is the other section\nwith multiple lines"
-        );
-    }
-
-    #[test]
-    fn section_parsing_with_additional_headers() {
-        let input = "\
-            # ls\n\
-            ## after section\n\
-            This is some section\n\
-            \n\
-            ### level 3 header\n\
-            \n\
-            Additional text under the section.\n\
-            \n\
-            #### level 4 header\n\
-            \n\
-            Yet another paragraph\n";
-
-        assert_eq!(
-            parse_help_section("after section", input),
-            "This is some section\n\n\
-            ### level 3 header\n\n\
-            Additional text under the section.\n\n\
-            #### level 4 header\n\n\
-            Yet another paragraph"
-        );
-    }
-
-    #[test]
-    #[should_panic]
-    fn section_parsing_panic() {
-        let input = "\
-            # ls\n\
-            ## some section\n\
-            This is some section\n\
-            \n\
-            ## ANOTHER SECTION
-            This is the other section\n\
-            with multiple lines\n";
-        parse_help_section("non-existent section", input);
-    }
-
-    #[test]
-    fn usage_parsing() {
-        let input = "\
-            # ls\n\
-            ```\n\
-            ls -l\n\
-            ```\n\
-            ## some section\n\
-            This is some section\n\
-            \n\
-            ## ANOTHER SECTION
-            This is the other section\n\
-            with multiple lines\n";
-
-        assert_eq!(parse_usage(input), "{} -l");
-    }
-
-    #[test]
-    fn multi_line_usage_parsing() {
-        let input = "\
-            # ls\n\
-            ```\n\
-            ls -a\n\
-            ls -b\n\
-            ls -c\n\
-            ```\n\
-            ## some section\n\
-            This is some section\n";
-
-        assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
-    }
-
-    #[test]
-    fn about_parsing() {
-        let input = "\
-            # ls\n\
-            ```\n\
-            ls -l\n\
-            ```\n\
-            \n\
-            This is the about section\n\
-            \n\
-            ## some section\n\
-            This is some section\n";
-
-        assert_eq!(parse_about(input), "This is the about section");
-    }
-
-    #[test]
-    fn multi_line_about_parsing() {
-        let input = "\
-            # ls\n\
-            ```\n\
-            ls -l\n\
-            ```\n\
-            \n\
-            about a\n\
-            \n\
-            about b\n\
-            \n\
-            ## some section\n\
-            This is some section\n";
-
-        assert_eq!(parse_about(input), "about a\n\nabout b");
-    }
-}