mirror of
https://github.com/orhun/systeroid
synced 2024-07-21 10:25:00 +00:00
refactor(parser): use regular expressions instead of pest parser
This commit is contained in:
parent
51e8777c5c
commit
1efc8cf504
193
Cargo.lock
generated
193
Cargo.lock
generated
|
@ -24,32 +24,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.7.3"
|
||||
name = "bstr"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
|
||||
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
|
||||
dependencies = [
|
||||
"block-padding",
|
||||
"byte-tools",
|
||||
"byteorder",
|
||||
"generic-array",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
|
||||
dependencies = [
|
||||
"byte-tools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byte-tools"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
|
@ -106,15 +88,6 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
|
@ -122,19 +95,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "fake-simd"
|
||||
version = "0.1.2"
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
|
@ -145,6 +109,30 @@ dependencies = [
|
|||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
"fnv",
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "globwalk"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"ignore",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
|
@ -155,26 +143,21 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-regex"
|
||||
version = "2.2.1"
|
||||
name = "ignore"
|
||||
version = "0.4.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17d198f91272f6e788a5c0bd5d741cf778da4e5bc761ec67b32d5d3b0db34a54"
|
||||
checksum = "713f1b139373f96a2e0ce3ac931cd01ee973c3c5dd7c40c0c2efe96ad2b6751d"
|
||||
dependencies = [
|
||||
"lazy-regex-proc_macros",
|
||||
"once_cell",
|
||||
"crossbeam-utils",
|
||||
"globset",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memchr",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-regex-proc_macros"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c12938b1b92cf5be22940527e15b79fd0c7e706e34bc70816f6a72b3484f84e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"syn",
|
||||
"same-file",
|
||||
"thread_local",
|
||||
"walkdir",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -190,10 +173,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6"
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
name = "log"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
|
@ -226,55 +212,6 @@ version = "1.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
|
||||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
|
||||
dependencies = [
|
||||
"ucd-trie",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_derive"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_generator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_generator"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_meta"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d"
|
||||
dependencies = [
|
||||
"maplit",
|
||||
"pest",
|
||||
"sha-1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.29"
|
||||
|
@ -350,18 +287,6 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"digest",
|
||||
"fake-simd",
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.78"
|
||||
|
@ -408,9 +333,8 @@ dependencies = [
|
|||
name = "systeroid-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lazy-regex",
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"globwalk",
|
||||
"regex",
|
||||
"systeroid-core",
|
||||
]
|
||||
|
||||
|
@ -435,16 +359,13 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.14.0"
|
||||
name = "thread_local"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec"
|
||||
|
||||
[[package]]
|
||||
name = "ucd-trie"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
|
||||
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use std::fmt::{self, Display, Formatter};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
|
||||
/// Sections of the sysctl documentation.
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
|
@ -33,6 +33,17 @@ impl From<String> for SysctlSection {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a Path> for SysctlSection {
|
||||
fn from(value: &'a Path) -> Self {
|
||||
for section in Self::variants() {
|
||||
if value.file_stem().map(|v| v.to_str()).flatten() == Some(§ion.to_string()) {
|
||||
return *section;
|
||||
}
|
||||
}
|
||||
Self::Unknown
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SysctlSection {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "{}", format!("{:?}", self).to_lowercase())
|
||||
|
@ -52,14 +63,6 @@ impl SysctlSection {
|
|||
Self::Vm,
|
||||
]
|
||||
}
|
||||
|
||||
/// Returns the path of the sysctl section.
|
||||
pub fn as_path(&self, kernel_docs: &Path) -> PathBuf {
|
||||
kernel_docs
|
||||
.join("admin-guide")
|
||||
.join("sysctl")
|
||||
.join(Path::new(&self.to_string()).with_extension("rst"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Documentation of a kernel parameter.
|
||||
|
|
|
@ -7,17 +7,24 @@ pub enum Error {
|
|||
#[error("IO error: `{0}`")]
|
||||
IoError(#[from] std::io::Error),
|
||||
/// Error that may occur whenever a lock is acquired.
|
||||
#[error("Thread lock error: `{0}`")]
|
||||
#[error("thread lock error: `{0}`")]
|
||||
ThreadLockError(String),
|
||||
/// Error that may occur while parsing documents.
|
||||
#[error("parse error: `{0}`")]
|
||||
ParseError(String),
|
||||
/// Error that may occur due to invalid UTF-8 strings.
|
||||
#[error("non-UTF-8 string")]
|
||||
Utf8Error,
|
||||
/// Error that may occur while traversing paths using a glob pattern.
|
||||
#[error("glob error: `{0}`")]
|
||||
GlobError(String),
|
||||
/// Error that may occur during the compilation of a regex.
|
||||
#[error("regex error: `{0}`")]
|
||||
RegexError(String),
|
||||
/// Error that may occur while handling sysctl operations.
|
||||
#[error("sysctl error: `{0}`")]
|
||||
SysctlError(#[from] sysctl::SysctlError),
|
||||
}
|
||||
|
||||
unsafe impl Send for Error {}
|
||||
|
||||
/// Type alias for the standard [`Result`] type.
|
||||
pub type Result<T> = core::result::Result<T, Error>;
|
||||
|
|
|
@ -11,5 +11,5 @@ pub mod sysctl;
|
|||
/// File reader.
|
||||
pub mod reader;
|
||||
|
||||
/// Error handler.
|
||||
/// Error implementation.
|
||||
pub mod error;
|
||||
|
|
|
@ -5,9 +5,8 @@ authors = ["Orhun Parmaksız <orhunparmaksiz@gmail.com>"]
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
pest = "2.1.3"
|
||||
pest_derive = "2.1.0"
|
||||
lazy-regex = "2.2.1"
|
||||
regex = "1.5.4"
|
||||
globwalk = "0.8.1"
|
||||
|
||||
[dependencies.systeroid-core]
|
||||
version = "0.1.0"
|
||||
|
|
|
@ -4,9 +4,3 @@
|
|||
|
||||
/// RST parser.
|
||||
pub mod parser;
|
||||
|
||||
/// Parsed title.
|
||||
pub mod title;
|
||||
|
||||
#[macro_use]
|
||||
extern crate pest_derive;
|
||||
|
|
|
@ -1,40 +1,66 @@
|
|||
#![allow(missing_docs)] // pest_derive does not generate doc comments
|
||||
|
||||
use crate::title::Title;
|
||||
use pest::Parser;
|
||||
use std::convert::TryFrom;
|
||||
use regex::{Captures, RegexBuilder};
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
use systeroid_core::docs::{Documentation, SysctlSection};
|
||||
use systeroid_core::error::{Error, Result};
|
||||
use systeroid_core::reader;
|
||||
|
||||
/// Parser for the reStructuredText format.
|
||||
#[derive(Parser)]
|
||||
#[grammar = "rst.pest"]
|
||||
pub struct RstParser;
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct RstParser<'a> {
|
||||
/// Glob pattern to specify the files to parse.
|
||||
pub glob_path: &'a str,
|
||||
/// Regular expression to use for parsing.
|
||||
pub regex: &'a str,
|
||||
/// Section of the parsed documents.
|
||||
pub section: Option<SysctlSection>,
|
||||
}
|
||||
|
||||
impl RstParser {
|
||||
impl RstParser<'_> {
|
||||
/// Parses the given reStructuredText input and returns the [`documentation`] of kernel parameters.
|
||||
///
|
||||
/// [`documentation`]: Documentation
|
||||
pub fn parse_docs(input: &str, section: SysctlSection) -> Result<Vec<Documentation>> {
|
||||
pub fn parse(&self, kernel_docs: &Path) -> Result<Vec<Documentation>> {
|
||||
let mut param_docs = Vec::new();
|
||||
let rst_document =
|
||||
Self::parse(Rule::document, input).map_err(|e| Error::ParseError(e.to_string()))?;
|
||||
let titles = rst_document
|
||||
.filter_map(|pair| Title::try_from(pair).ok())
|
||||
.collect::<Vec<Title<'_>>>();
|
||||
for (i, title) in titles.iter().enumerate() {
|
||||
param_docs.push(Documentation::new(
|
||||
title.value.to_string(),
|
||||
if let Some(next_title) = titles.get(i + 1) {
|
||||
(input[title.end_pos..next_title.start_pos])
|
||||
.trim()
|
||||
.to_string()
|
||||
} else {
|
||||
(input[title.end_pos..]).trim().to_string()
|
||||
},
|
||||
section,
|
||||
));
|
||||
|
||||
let regex = RegexBuilder::new(self.regex)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.map_err(|e| Error::RegexError(e.to_string()))?;
|
||||
for file in globwalk::glob(
|
||||
kernel_docs
|
||||
.join(self.glob_path)
|
||||
.to_str()
|
||||
.ok_or(Error::Utf8Error)?,
|
||||
)
|
||||
.map_err(|e| Error::GlobError(e.to_string()))?
|
||||
.filter_map(StdResult::ok)
|
||||
{
|
||||
let section = self
|
||||
.section
|
||||
.unwrap_or_else(|| SysctlSection::from(file.path()));
|
||||
let input = reader::read_to_string(file.path())?;
|
||||
let capture_group = regex.captures_iter(&input).collect::<Vec<Captures<'_>>>();
|
||||
|
||||
for (i, captures) in capture_group.iter().enumerate() {
|
||||
let title_capture = captures.iter().last().flatten().unwrap();
|
||||
let capture = captures.iter().next().flatten().unwrap();
|
||||
|
||||
param_docs.push(Documentation::new(
|
||||
title_capture.as_str().trim().to_string(),
|
||||
if let Some(next_capture) = capture_group.get(i + 1) {
|
||||
let next_capture = next_capture.iter().next().flatten().unwrap();
|
||||
(input[capture.end()..next_capture.start()])
|
||||
.trim()
|
||||
.to_string()
|
||||
} else {
|
||||
(input[capture.end()..]).trim().to_string()
|
||||
},
|
||||
section,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(param_docs)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,538 +0,0 @@
|
|||
// Taken from https://github.com/flying-sheep/rust-rst/blob/master/parser/src/rst.pest
|
||||
|
||||
// Entry point: the document.
|
||||
|
||||
// This grammar is aligned to the doctree names when possible.
|
||||
// It will however contain blocks, as we can’t parse sections:
|
||||
// Section headers define the hierarchy by their delimiters,
|
||||
// and pest only has one stack that we need for indentation.
|
||||
|
||||
document = _{ SOI ~ blocks ~ EOI }
|
||||
blocks = _{ block ~ (blank_line* ~ block)* ~ blank_line? }
|
||||
block = _{ PEEK[..] ~ hanging_block }
|
||||
|
||||
// This is the list of all block-level elements
|
||||
// They’re defined hanging, i.e. without the first PEEK[..]
|
||||
hanging_block = _{
|
||||
substitution_def
|
||||
| image_directive
|
||||
| code_directive
|
||||
| raw_directive
|
||||
| admonition
|
||||
| admonition_gen
|
||||
| target
|
||||
| literal_block
|
||||
// Comments should be below the directives to try to match them first, but
|
||||
// above the title that will interpret ".." as a title marker.
|
||||
| block_comment
|
||||
| title
|
||||
| bullet_list
|
||||
| paragraph
|
||||
// TODO: implement all those things:
|
||||
// | block_quote
|
||||
// | verbatim
|
||||
// | doctest_block
|
||||
// | horizontal_rule
|
||||
// | table
|
||||
// | ordered_list
|
||||
// | plain
|
||||
}
|
||||
|
||||
// Substitution definition. A block type
|
||||
substitution_def = { ".." ~ PUSH(" "+) ~ "|" ~ substitution_name ~ "|" ~ " "+ ~ inline_dirblock ~ DROP }
|
||||
substitution_name = { !" " ~ (!(" "|"|") ~ ANY)+ ~ (" "+ ~ (!(" "|"|") ~ ANY)+)* }
|
||||
inline_dirblock = _{ replace | image } // TODO: implement others
|
||||
|
||||
// Target. A block type
|
||||
target = { target_qu | target_uq }
|
||||
target_uq = _{ ".. _" ~ target_name_uq ~ ":" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE }
|
||||
target_qu = _{ ".. _`" ~ !"``" ~ target_name_qu ~ !"``:" ~ "`:" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE }
|
||||
target_name_uq = { ( !("_"|":"|"`") ~ !NEWLINE ~ ANY )* }
|
||||
target_name_qu = { ( !(":"|"`"|"_>") ~ ANY )* }
|
||||
link_target = { nonspacechar+ }
|
||||
|
||||
// Title. A block type
|
||||
title = { title_double | title_single }
|
||||
title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP }
|
||||
title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE }
|
||||
|
||||
// Bullet list. A block type.
|
||||
bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* }
|
||||
bullet_item = { bullet_marker ~ PUSH(" "+) ~ line ~ blank_line* ~ blist_body? ~ DROP }
|
||||
blist_body = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* }
|
||||
|
||||
// paragraph. A block type.
|
||||
paragraph = { inlines }
|
||||
|
||||
// literal_block
|
||||
literal_block = {
|
||||
"::" ~ " "* ~ NEWLINE ~
|
||||
blank_line+ ~ PUSH(" "+) ~ literal_lines ~ DROP
|
||||
}
|
||||
literal_lines = { literal_line ~ (literal_line_blank* ~ PEEK[..] ~ literal_line)* }
|
||||
literal_line_blank = { " "* ~ NEWLINE }
|
||||
literal_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE }
|
||||
|
||||
/* Directives: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives
|
||||
* .. name:: arguments ~ :options: ~ blank_line+ ~ content
|
||||
* Everything except for the first argument has to be indented
|
||||
*/
|
||||
|
||||
|
||||
// Directives with options can have these or specific ones:
|
||||
common_opt_name = { "class" | "name" }
|
||||
|
||||
// Replace. A directive only usable in substitutions.
|
||||
|
||||
replace = { ^"replace::" ~ " "* ~ paragraph }
|
||||
|
||||
// Image. A directive.
|
||||
|
||||
image_directive = _{ ".." ~ PUSH(" "+) ~ image ~ DROP }
|
||||
image = { ^"image::" ~ line ~ image_opt_block? }
|
||||
image_opt_block = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ image_option } //TODO: merge with other directives?
|
||||
image_option = { ":" ~ image_opt_name ~ ":" ~ line }
|
||||
image_opt_name = { common_opt_name | "alt" | "height" | "width" | "scale" | "align" | "target" }
|
||||
|
||||
// Code block. A directive that allows adding a language to a literal block
|
||||
|
||||
code_directive = {
|
||||
".." ~ PUSH(" "+) ~ "code" ~ "-block"? ~ "::" ~ (" "+ ~ source)? ~ NEWLINE ~
|
||||
blank_line+ ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ literal_lines ~ DROP
|
||||
}
|
||||
source = { (!NEWLINE ~ ANY)+ }
|
||||
|
||||
// Raw block. A directive
|
||||
|
||||
raw_directive = {
|
||||
".." ~ PUSH(" "+) ~ "raw::" ~ " "+ ~ raw_output_format ~ NEWLINE ~
|
||||
blank_line+ ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ raw_block ~ DROP
|
||||
}
|
||||
raw_output_format = { (!NEWLINE ~ ANY)+ }
|
||||
raw_block = { raw_line ~ (raw_line_blank* ~ PEEK[..] ~ raw_line)* }
|
||||
raw_line_blank = { " "* ~ NEWLINE }
|
||||
raw_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE }
|
||||
|
||||
// Admonition. A directive. The generic one has a title
|
||||
|
||||
admonition = { ".." ~ PUSH(" "+) ~ ^"admonition::" ~ line ~ blank_line* ~ admonition_content? ~ DROP }
|
||||
admonition_gen = { ".." ~ PUSH(" "+) ~ admonition_type ~ "::" ~ (blank_line | line) ~ blank_line* ~ admonition_content? ~ DROP }
|
||||
admonition_type = { ^"attention" | ^"caution" | ^"danger" | ^"error" | ^"hint" | ^"important" | ^"note" | ^"tip" | ^"warning" }
|
||||
admonition_content = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } //TODO: merge with other directives?
|
||||
|
||||
// Comments.
|
||||
|
||||
block_comment = {
|
||||
".." ~
|
||||
( // Without title
|
||||
(" "* ~ NEWLINE)+ ~ PUSH(" "+) ~ comment_hanging ~ DROP
|
||||
// or with title
|
||||
| PUSH(" "+) ~ comment_line ~
|
||||
(comment_line_blank* ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ comment_hanging)? ~
|
||||
DROP
|
||||
// or empty
|
||||
| " "* ~ NEWLINE
|
||||
) ~
|
||||
(" "* ~ NEWLINE)*
|
||||
}
|
||||
comment_hanging = _{ comment_line ~ (comment_line_blank* ~ PEEK[..] ~ comment_line)* }
|
||||
comment_line_blank = { " "* ~ NEWLINE }
|
||||
comment_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE }
|
||||
|
||||
|
||||
/*
|
||||
* inlines
|
||||
*/
|
||||
|
||||
|
||||
line = { !marker ~ inline+ ~ NEWLINE }
|
||||
blank_line = _{ !marker ~ !inline ~ " "* ~ NEWLINE }
|
||||
|
||||
inlines = _{ !marker ~ inline+ ~ ( ( ws_newline ~ PEEK[..] ~ !marker ~ inline+ )+ ~ NEWLINE )? }
|
||||
ws_newline = { NEWLINE }
|
||||
inline = _{ inline_special | str }
|
||||
inline_special = _{
|
||||
reference
|
||||
| substitution_ref
|
||||
| emph_outer
|
||||
| strong_outer
|
||||
| literal_outer
|
||||
// | ul_or_star_line
|
||||
// | space
|
||||
// | note_reference
|
||||
// | footnote
|
||||
// //| citation
|
||||
// | code
|
||||
// | application_depent
|
||||
// | entity
|
||||
// | escaped_char
|
||||
// | smart
|
||||
// | symbol
|
||||
}
|
||||
|
||||
str = { (!(NEWLINE | inline_special) ~ ANY)+ }
|
||||
|
||||
// simple formatting
|
||||
inline_nested = _{ inline_special | str_nested }
|
||||
str_nested = { word_nested ~ ( " "+ ~ word_nested)* }
|
||||
// TODO: allow ` in emph
|
||||
word_nested = _{ (!(NEWLINE | " " | inline_special | "*" | "`") ~ ANY)+ }
|
||||
|
||||
emph_outer = _{ "*" ~ emph ~ "*" }
|
||||
emph = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* }
|
||||
strong_outer = _{ "**" ~ strong ~ "**" }
|
||||
strong = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* }
|
||||
literal_outer = _{ "``" ~ literal ~ "``" }
|
||||
literal = { (!"``" ~ ANY)+ }
|
||||
|
||||
// inline links
|
||||
reference = { reference_target | reference_explicit | reference_auto }
|
||||
|
||||
reference_target = { (reference_target_uq ~ "_" | reference_target_qu) ~ !(LETTER|NUMBER) }
|
||||
reference_target_uq = { (!("_"|":"|"`") ~ nonspacechar)+ }
|
||||
reference_target_qu = { ( !("`"? ~ "`_") ~ "`" ~ !"``" ) ~ reference_text? ~ ("<" ~ reference_bracketed ~ ">")? ~ ( "`" ~ !"``" ) ~ "_" }
|
||||
reference_text = { !"<" ~ ( !("`"|"<") ~ ANY )+ }
|
||||
reference_bracketed = { url | (target_name_qu ~ "_") | relative_reference }
|
||||
relative_reference = { (!("`"|">") ~ ANY)+ }
|
||||
|
||||
reference_explicit = { reference_label ~ "(" ~ " "* ~ reference_source ~ " "* ~ (NEWLINE ~ PEEK[..])? ~ reference_title ~ " "* ~ ")" }
|
||||
reference_label = { "[" ~ !"^" ~ (!"]" ~ inline)* ~ "]" }
|
||||
reference_source = { reference_source_contents }
|
||||
reference_source_contents = _{ ( (!("("|")"|">") ~ nonspacechar)+ | "(" ~ reference_source_contents ~ ")" )* }
|
||||
reference_title = { ( reference_title_single | reference_title_double | "" ) }
|
||||
reference_title_single = { "'" ~ ( !("'" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "'" }
|
||||
reference_title_double = { "\"" ~ ( !("\"" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "\"" }
|
||||
|
||||
// Emails can't end with punctuation, but URLs must use a separate rule.
|
||||
reference_auto = { url_auto | email }
|
||||
//reference_embedded = { "`" ~ reference_embedded_source ~ "<" ~ absolute_url_with_fragment ~ ">`_" ~ "_"? }
|
||||
//reference_embedded_source = { ( !("<"|":"|"`") ~ ( " " | nonspacechar | blank_line ) )* }
|
||||
|
||||
substitution_ref = _{ "|" ~ substitution_name ~ "|" }
|
||||
|
||||
/* URLs as defined by the WHATWG URL standard. */
|
||||
url = { absolute_url_no_query ~ ("?" ~ url_unit*)? ~ ("#" ~ url_unit*)? }
|
||||
absolute_url_no_query = {
|
||||
( special_url_scheme ~ ":" ~ scheme_relative_special_url ) |
|
||||
( ^"file:" ~ scheme_relative_file_url ) |
|
||||
( arbitrary_scheme ~ ":" ~ relative_url )
|
||||
}
|
||||
scheme_relative_special_url = { "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? }
|
||||
path_absolute_url = { "/" ~ path_relative_url }
|
||||
path_relative_url = { ( url_path_segment_unit* ~ "/" )* ~ url_path_segment_unit* }
|
||||
url_path_segment_unit = { !("/"|"?") ~ url_unit }
|
||||
url_port = { ASCII_DIGIT* }
|
||||
scheme_relative_file_url = { "//" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url }
|
||||
relative_url = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? ) | path_absolute_url | (!(arbitrary_scheme ~ ":") ~ path_relative_url) }
|
||||
// this is approximately a superset of valid hosts and opaque hosts
|
||||
host = { ( !(":"|"/"|"?"|"#") ~ url_unit)+ | ("["~(ASCII_HEX_DIGIT|"."|":")+~"]") }
|
||||
special_url_scheme = { ^"ftp" | (^"http" | ^"ws") ~ ^"s"? } /* doesn't include "file" */
|
||||
arbitrary_scheme = { ASCII_ALPHA ~ ASCII_ALPHANUMERIC* }
|
||||
// taken at 2020-09-06 from https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||
known_scheme = {
|
||||
"aaa"|"aaas"|"about"|"acap"|"acct"|"acd"|"acr"|"adiumxtra"|"adt"|"afp"|"afs"|"aim"|"amss"|"android"|"appdata"|"apt"|"ark"|"attachment"|"aw"|
|
||||
"barion"|"beshare"|"bitcoin"|"bitcoincash"|"blob"|"bolo"|"browserext"|"cabal"|"calculator"|"callto"|"cap"|"cast"|"casts"|"chrome"|
|
||||
"chrome-extension"|"cid"|"coap"|"coap+tcp"|"coap+ws"|"coaps"|"coaps+tcp"|"coaps+ws"|"com-eventbrite-attendee"|"content"|"conti"|"crid"|"cvs"|
|
||||
"dab"|"dat"|"data"|"dav"|"diaspora"|"dict"|"did"|"dis"|"dlna-playcontainer"|"dlna-playsingle"|"dns"|"dntp"|"doi"|"dpp"|"drm"|"drop"|"dtmi"|
|
||||
"dtn"|"dvb"|"dweb"|"ed2k"|"elsi"|"ens"|"ethereum"|"example"|"facetime"|"fax"|"feed"|"feedready"|"file"|"filesystem"|"finger"|
|
||||
"first-run-pen-experience"|"fish"|"fm"|"ftp"|"fuchsia-pkg"|"geo"|"gg"|"git"|"gizmoproject"|"go"|"gopher"|"graph"|"gtalk"|"h323"|"ham"|"hcap"|
|
||||
"hcp"|"http"|"https"|"hxxp"|"hxxps"|"hydrazone"|"hyper"|"iax"|"icap"|"icon"|"im"|"imap"|"info"|"iotdisco"|"ipfs"|"ipn"|"ipns"|"ipp"|"ipps"|
|
||||
"irc"|"irc6"|"ircs"|"iris"|"iris.beep"|"iris.lwz"|"iris.xpc"|"iris.xpcs"|"isostore"|"itms"|"jabber"|"jar"|"jms"|"keyparc"|"lastfm"|"lbry"|
|
||||
"ldap"|"ldaps"|"leaptofrogans"|"lorawan"|"lvlt"|"magnet"|"mailserver"|"mailto"|"maps"|"market"|"matrix"|"message"|"microsoft.windows.camera"|
|
||||
"microsoft.windows.camera.multipicker"|"microsoft.windows.camera.picker"|"mid"|"mms"|"modem"|"mongodb"|"moz"|"ms-access"|
|
||||
"ms-browser-extension"|"ms-calculator"|"ms-drive-to"|"ms-enrollment"|"ms-excel"|"ms-eyecontrolspeech"|"ms-gamebarservices"|
|
||||
"ms-gamingoverlay"|"ms-getoffice"|"ms-help"|"ms-infopath"|"ms-inputapp"|"ms-lockscreencomponent-config"|"ms-media-stream-id"|
|
||||
"ms-mixedrealitycapture"|"ms-mobileplans"|"ms-officeapp"|"ms-people"|"ms-project"|"ms-powerpoint"|"ms-publisher"|"ms-restoretabcompanion"|
|
||||
"ms-screenclip"|"ms-screensketch"|"ms-search"|"ms-search-repair"|"ms-secondary-screen-controller"|"ms-secondary-screen-setup"|"ms-settings"|
|
||||
"ms-settings-airplanemode"|"ms-settings-bluetooth"|"ms-settings-camera"|"ms-settings-cellular"|"ms-settings-cloudstorage"|
|
||||
"ms-settings-connectabledevices"|"ms-settings-displays-topology"|"ms-settings-emailandaccounts"|"ms-settings-language"|
|
||||
"ms-settings-location"|"ms-settings-lock"|"ms-settings-nfctransactions"|"ms-settings-notifications"|"ms-settings-power"|
|
||||
"ms-settings-privacy"|"ms-settings-proximity"|"ms-settings-screenrotation"|"ms-settings-wifi"|"ms-settings-workplace"|"ms-spd"|
|
||||
"ms-sttoverlay"|"ms-transit-to"|"ms-useractivityset"|"ms-virtualtouchpad"|"ms-visio"|"ms-walk-to"|"ms-whiteboard"|"ms-whiteboard-cmd"|
|
||||
"ms-word"|"msnim"|"msrp"|"msrps"|"mss"|"mtqp"|"mumble"|"mupdate"|"mvn"|"news"|"nfs"|"ni"|"nih"|"nntp"|"notes"|"ocf"|"oid"|"onenote"|
|
||||
"onenote-cmd"|"opaquelocktoken"|"openpgp4fpr"|"otpauth"|"pack"|"palm"|"paparazzi"|"payment"|"payto"|"pkcs11"|"platform"|"pop"|"pres"|
|
||||
"prospero"|"proxy"|"pwid"|"psyc"|"pttp"|"qb"|"query"|"quic-transport"|"redis"|"rediss"|"reload"|"res"|"resource"|"rmi"|"rsync"|"rtmfp"|
|
||||
"rtmp"|"rtsp"|"rtsps"|"rtspu"|"secondlife"|"service"|"session"|"sftp"|"sgn"|"shttp"|"sieve"|"simpleledger"|"sip"|"sips"|"skype"|"smb"|"sms"|
|
||||
"smtp"|"snews"|"snmp"|"soap.beep"|"soap.beeps"|"soldat"|"spiffe"|"spotify"|"ssb"|"ssh"|"steam"|"stun"|"stuns"|"submit"|"swh"|"svn"|"tag"|
|
||||
"teamspeak"|"tel"|"teliaeid"|"telnet"|"tftp"|"things"|"thismessage"|"tip"|"tn3270"|"tool"|"turn"|"turns"|"tv"|"udp"|"unreal"|"upt"|"urn"|
|
||||
"ut2004"|"v-event"|"vemmi"|"ventrilo"|"videotex"|"vnc"|"view-source"|"vscode"|"vscode-insiders"|"vsls"|"wais"|"webcal"|"wifi"|"wpid"|"ws"|
|
||||
"wss"|"wtai"|"wyciwyg"|"xcon"|"xcon-userid"|"xfire"|"xmlrpc.beep"|"xmlrpc.beeps"|"xmpp"|"xri"|"ymsgr"|"z39.50"|"z39.50r"|"z39.50s"
|
||||
}
|
||||
url_unit = {
|
||||
ASCII_ALPHANUMERIC |
|
||||
"!"|"$"|"&"|"'"|"("|")"|"*"|"+"|","|"-"|"."|"/"|":"|";"|"="|"?"|"@"|"_"|"~" |
|
||||
(!(SURROGATE|NONCHARACTER_CODE_POINT) ~ '\u{A0}'..'\u{10FFFD}') |
|
||||
("%" ~ ASCII_HEX_DIGIT{2})
|
||||
}
|
||||
|
||||
/*
|
||||
* Rules for URLs that don't end in punctuation.
|
||||
* This is a modification of the rules above to incorporate the docutils rules
|
||||
* for the final character in an auto URL and for the character after it.
|
||||
* The patterns used here to emulate the behavior of docutils' regex are taken
|
||||
* from <http://www.inf.puc-rio.br/~roberto/docs/ry10-01.pdf>.
|
||||
*/
|
||||
url_auto = {
|
||||
( absolute_url_no_query ~ ("?" ~ url_unit*)? ~ "#" ~ url_units_auto ) |
|
||||
( absolute_url_no_query ~ "?" ~ url_units_auto ) |
|
||||
( special_url_scheme ~ "://" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) |
|
||||
( special_url_scheme ~ "://" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) |
|
||||
( special_url_scheme ~ "://" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) |
|
||||
( ^"file://" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url_auto ) |
|
||||
( known_scheme ~ ":" ~ relative_url_auto )
|
||||
}
|
||||
domain_host_auto = {
|
||||
( !(":"|"/"|"?"|"#") ~ url_unit ~ url_units_auto ) |
|
||||
( !(":"|"/"|"?"|"#") ~ url_unit ~ &">" ) |
|
||||
( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url )
|
||||
}
|
||||
path_absolute_url_auto = { "/" ~ path_relative_url_auto }
|
||||
path_relative_url_auto = { prua1 | prua2 | &follows_auto_url }
|
||||
prua1 = { ( url_path_segment_unit ~ prua1 ) | ( "/" ~ path_relative_url_auto ) }
|
||||
prua2 = { ( url_path_segment_unit ~ prua2 ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"="|"+") ~ &follows_auto_url ) }
|
||||
relative_url_auto = {
|
||||
( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) |
|
||||
( "//" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) |
|
||||
( "//" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) |
|
||||
path_absolute_url_auto |
|
||||
// (prua1|prua2) is path_relative_url_auto minus the &follows_auto_url case
|
||||
(!(known_scheme ~ ":") ~ (prua1 | prua2))
|
||||
}
|
||||
url_units_auto = {
|
||||
( url_unit ~ url_units_auto ) |
|
||||
( url_unit ~ &">" ~ &follows_auto_url ) |
|
||||
( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url )
|
||||
}
|
||||
follows_auto_url = {
|
||||
EOI|"\x00"|WHITE_SPACE|">"|"\u{201A}"|"\u{201E}"|
|
||||
(!(CONNECTOR_PUNCTUATION|OPEN_PUNCTUATION|"#"|"%"|"&"|"*"|"@") ~ PUNCTUATION)
|
||||
}
|
||||
|
||||
/* Rules for emails as defined by the HTML standard */
|
||||
email = { ( email_atext | "." )+ ~ "@" ~ email_label ~ ( "." ~ email_label )* }
|
||||
email_atext = { ASCII_ALPHANUMERIC|"!"|"#"|"$"|"%"|"&"|"'"|"/"|"="|"?"|"^"|"_"|"`"|"{"|"|"|"}"|"~" }
|
||||
email_label = { ASCII_ALPHANUMERIC ~ ( !("-"+ ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHANUMERIC|"-") ){0,62} }
|
||||
|
||||
/*
|
||||
* character classes
|
||||
*/
|
||||
|
||||
|
||||
bullet_marker = _{ "+" | "*" | "-" }
|
||||
adornments = {
|
||||
// recommended
|
||||
"="+ | "-"+ | "`"+ | ":"+ | "."+ | "'"+ | "\""+ | "~"+ | "^"+ | "_"+ | "*"+ | "+"+ | "#"+ |
|
||||
// parentheses
|
||||
"("+ | ")"+ | "["+ | "]"+ | "{"+ | "}"+ |
|
||||
// punctuation
|
||||
","+ | ";"+ | "!"+ | "?"+ |
|
||||
// operators
|
||||
"&"+ | "|"+ | "/"+ | "%"+ | "<"+ | ">"+ |
|
||||
// misc
|
||||
"$"+ | "@"+ | "\\"+
|
||||
}
|
||||
nonspacechar = _{ !(" " | NEWLINE) ~ ANY }
|
||||
|
||||
|
||||
/*
|
||||
* lookaheads. do not use in another position
|
||||
*/
|
||||
|
||||
|
||||
marker = _{ (bullet_marker | "..") ~ " " }
|
||||
|
||||
|
||||
|
||||
//#################################################################################
|
||||
|
||||
|
||||
|
||||
// doctest_block = { (doctest_line+ ~ (!(">" | blank_line) ~ line)*)+ }
|
||||
|
||||
// block_quote_raw = { ":" ~ blank_line ~ NEWLINE ~ nonblank_indented_line+ }
|
||||
|
||||
// block_quote_chunk = {
|
||||
// !"::" ~ ":" ~ blank_line ~
|
||||
// NEWLINE ~
|
||||
// blank_line* ~
|
||||
// nonblank_indented_line+
|
||||
// }
|
||||
|
||||
// block_quote = { block_quote_chunk+ }
|
||||
|
||||
// horizontal_rule = {
|
||||
// ( "=" ~ sp ~ "=" ~ sp ~ "=" ~ (sp ~ "=")*
|
||||
// | "-" ~ sp ~ "-" ~ sp ~ "-" ~ (sp ~ "-")*
|
||||
// | "*" ~ sp ~ "*" ~ sp ~ "*" ~ (sp ~ "*")*
|
||||
// | "^" ~ sp ~ "^" ~ sp ~ "^" ~ (sp ~ "^")*
|
||||
// | "~" ~ sp ~ "~" ~ sp ~ "~" ~ (sp ~ "~")*
|
||||
// | "_" ~ sp ~ "_" ~ sp ~ "_" ~ (sp ~ "_")*
|
||||
// ) ~
|
||||
// sp ~ NEWLINE ~ blank_line+
|
||||
// }
|
||||
|
||||
// table = { grid_table | header_less_grid_table | simple_table }
|
||||
|
||||
// simple_table = { "NotImplemented" ~ "simple_table" }
|
||||
|
||||
// grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ }
|
||||
// header_less_grid_table = { grid_table_sep ~ grid_table_body+ }
|
||||
// grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ }
|
||||
// grid_table_body = { ( grid_table_row ~ grid_table_sep )+ }
|
||||
// grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line }
|
||||
// table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ }
|
||||
// grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line }
|
||||
// grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line }
|
||||
|
||||
// bullet = { !horizontal_rule ~ ("+" | "*" | "-") ~ spacechar+ }
|
||||
|
||||
// bullet_list = { &bullet ~ (list_tight | list_loose) }
|
||||
|
||||
// list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) }
|
||||
// list_loose = { ( list_item ~ blank_line* )+ }
|
||||
|
||||
// list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* }
|
||||
// list_item_tight = {
|
||||
// (bullet | enumerator | def_marker) ~
|
||||
// list_block ~
|
||||
// (!blank_line ~ list_continuation_block)* ~
|
||||
// !list_continuation_block
|
||||
// }
|
||||
|
||||
// list_block = { !blank_line ~ line ~ list_block_line* }
|
||||
|
||||
// list_continuation_block = { blank_line* ~ ( indent ~ list_block )+ }
|
||||
|
||||
// enumerator = { (ASCII_DIGIT+ | "#"+) ~ "." ~ spacechar+ }
|
||||
|
||||
// ordered_list = { &enumerator ~ (list_tight | list_loose) }
|
||||
|
||||
// list_block_line = {
|
||||
// !blank_line ~
|
||||
// !( (indent? ~ (bullet | enumerator)) | def_marker ) ~
|
||||
// !horizontal_rule ~
|
||||
// optionally_indented_line
|
||||
// }
|
||||
|
||||
|
||||
|
||||
// space = _{ spacechar+ }
|
||||
|
||||
// str = { normal_char+ ~ str_chunk* }
|
||||
// str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ }
|
||||
|
||||
// escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "#" | "+" | "." | "!" | ">" | "<") }
|
||||
|
||||
// entity = { hex_entity | dec_entity | char_entity }
|
||||
|
||||
// endline = _{ line_break | terminal_endline | normal_endline }
|
||||
// normal_endline = _{ sp ~ NEWLINE ~ !(blank_line | ">" | line ~ ("="+ | "-"+) ~ NEWLINE) }
|
||||
// terminal_endline = _{ sp ~ NEWLINE ~ EOI }
|
||||
// line_break = _{ " " ~ normal_endline }
|
||||
|
||||
// symbol = { special_char }
|
||||
|
||||
// application_depent = { !("`_" | "``_") ~ "`" ~ !"``" ~ target_name_qu ~ "`" ~ !("``" | "_") }
|
||||
|
||||
// // This keeps the parser from getting bogged down on long strings of "*" or "_",
|
||||
// // or strings of "*" or "_" with space on each side:
|
||||
// ul_or_star_line = { ul_line | star_line }
|
||||
// star_line = { "****" ~ "*"* | spacechar ~ "*"+ ~ &spacechar }
|
||||
// ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar }
|
||||
|
||||
|
||||
// empty_title = { "" }
|
||||
|
||||
// ticks_2 = { "``" ~ !"`" }
|
||||
|
||||
// code = { ticks_2 ~ ( (!"`" ~ nonspacechar)+ | "_" | !ticks_2 ~ "`" | !(sp ~ ticks_2) ~ (spacechar | NEWLINE ~ !blank_line) )+ ~ ticks_2 }
|
||||
|
||||
|
||||
// quoted = {
|
||||
// "\"" ~ (!"\"" ~ ANY)* ~ "\"" |
|
||||
// "'" ~ (!"'" ~ ANY)* ~ "'"
|
||||
// }
|
||||
// spacechar = _{ " " | "\t" }
|
||||
// sp = _{ spacechar* }
|
||||
// spnl = _{ sp ~ (NEWLINE ~ sp)? }
|
||||
// special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char }
|
||||
// normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY }
|
||||
// alphanumeric = {
|
||||
// ASCII_ALPHANUMERIC |
|
||||
// "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" |
|
||||
// "\u{210}" | "\u{211}" | "\u{212}" | "\u{213}" | "\u{214}" | "\u{215}" | "\u{216}" | "\u{217}" |
|
||||
// "\u{220}" | "\u{221}" | "\u{222}" | "\u{223}" | "\u{224}" | "\u{225}" | "\u{226}" | "\u{227}" |
|
||||
// "\u{230}" | "\u{231}" | "\u{232}" | "\u{233}" | "\u{234}" | "\u{235}" | "\u{236}" | "\u{237}" |
|
||||
// "\u{240}" | "\u{241}" | "\u{242}" | "\u{243}" | "\u{244}" | "\u{245}" | "\u{246}" | "\u{247}" |
|
||||
// "\u{250}" | "\u{251}" | "\u{252}" | "\u{253}" | "\u{254}" | "\u{255}" | "\u{256}" | "\u{257}" |
|
||||
// "\u{260}" | "\u{261}" | "\u{262}" | "\u{263}" | "\u{264}" | "\u{265}" | "\u{266}" | "\u{267}" |
|
||||
// "\u{270}" | "\u{271}" | "\u{272}" | "\u{273}" | "\u{274}" | "\u{275}" | "\u{276}" | "\u{277}" |
|
||||
// "\u{300}" | "\u{301}" | "\u{302}" | "\u{303}" | "\u{304}" | "\u{305}" | "\u{306}" | "\u{307}" |
|
||||
// "\u{310}" | "\u{311}" | "\u{312}" | "\u{313}" | "\u{314}" | "\u{315}" | "\u{316}" | "\u{317}" |
|
||||
// "\u{320}" | "\u{321}" | "\u{322}" | "\u{323}" | "\u{324}" | "\u{325}" | "\u{326}" | "\u{327}" |
|
||||
// "\u{330}" | "\u{331}" | "\u{332}" | "\u{333}" | "\u{334}" | "\u{335}" | "\u{336}" | "\u{337}" |
|
||||
// "\u{340}" | "\u{341}" | "\u{342}" | "\u{343}" | "\u{344}" | "\u{345}" | "\u{346}" | "\u{347}" |
|
||||
// "\u{350}" | "\u{351}" | "\u{352}" | "\u{353}" | "\u{354}" | "\u{355}" | "\u{356}" | "\u{357}" |
|
||||
// "\u{360}" | "\u{361}" | "\u{362}" | "\u{363}" | "\u{364}" | "\u{365}" | "\u{366}" | "\u{367}" |
|
||||
// "\u{370}" | "\u{371}" | "\u{372}" | "\u{373}" | "\u{374}" | "\u{375}" | "\u{376}" | "\u{377}"
|
||||
// }
|
||||
|
||||
// hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" }
|
||||
// dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" }
|
||||
// char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" }
|
||||
|
||||
// indent = _{ "\t" | " " }
|
||||
// indented_line = { indent ~ line }
|
||||
// optionally_indented_line = { indent? ~ line }
|
||||
|
||||
// doctest_line = { ">>> " ~ raw_line }
|
||||
|
||||
// line = _{ raw_line }
|
||||
|
||||
// raw_line = _{ (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI }
|
||||
|
||||
// // Syntax extensions
|
||||
|
||||
// extended_special_char = {
|
||||
// //&{ extension(EXT_SMART) } ~
|
||||
// ("." | "-" | "\"" | "'") |
|
||||
// //&{ extension(EXT_NOTES) } ~
|
||||
// "^"
|
||||
// }
|
||||
|
||||
// smart = {
|
||||
// //&{ extension(EXT_SMART) } ~
|
||||
// ( ellipsis | dash | single_quoted | double_quoted | apostrophe )
|
||||
// }
|
||||
|
||||
// apostrophe = { "'" }
|
||||
|
||||
// ellipsis = { "..." | ". . ." }
|
||||
|
||||
// dash = { em_dash | en_dash }
|
||||
// en_dash = { "-" ~ &ASCII_DIGIT }
|
||||
// em_dash = { "---" | "--" }
|
||||
|
||||
// single_quote_start = { "'" ~ !(spacechar | NEWLINE) }
|
||||
// single_quote_end = { "'" ~ !alphanumeric }
|
||||
// single_quoted = { single_quote_start ~ ( !single_quote_end ~ inline )+ ~ single_quote_end }
|
||||
|
||||
// double_quote_start = { "\"" }
|
||||
// double_quote_end = { "\"" }
|
||||
// double_quoted = { double_quote_start ~ ( !double_quote_end ~ inline )+ ~ double_quote_end }
|
||||
|
||||
// footnote = { "[#" ~ (!"]" ~ inline)+ ~ "]_" }
|
||||
|
||||
// definition = {
|
||||
// &( (!defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~
|
||||
// d_list_title+ ~
|
||||
// (def_tight | def_loose)
|
||||
// }
|
||||
// d_list_title = { !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE }
|
||||
// def_tight = { &defmark ~ list_tight }
|
||||
// def_loose = { blank_line ~ &defmark ~ list_loose }
|
||||
// defmark = { (":" | "~") ~ spacechar+ }
|
||||
// def_marker = {
|
||||
// //&{ extension(EXT_DLISTS) } ~
|
||||
// defmark
|
||||
// }
|
|
@ -1,72 +0,0 @@
|
|||
use crate::parser::Rule;
|
||||
use lazy_regex::{regex, Lazy, Regex};
|
||||
use pest::iterators::Pair;
|
||||
use pest::Token;
|
||||
use std::convert::TryFrom;
|
||||
use systeroid_core::error::Error as ErrorImpl;
|
||||
|
||||
/// Regex for matching the explanation of the sysctl sections.
|
||||
///
|
||||
/// These _titles_ should be skipped since they are often describing the
|
||||
/// documentation in the following section rather than a kernel parameter.
|
||||
///
|
||||
/// e.g. `2. /proc/sys/fs/binfmt_misc`
|
||||
static SECTION_EXPL_REGEX: &Lazy<Regex> = regex!("[0-9].\\s/proc/sys/");
|
||||
|
||||
/// Title from the parsed RST document.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Title<'a> {
|
||||
/// Title value.
|
||||
pub value: &'a str,
|
||||
/// Start position of the title.
|
||||
pub start_pos: usize,
|
||||
/// End position of the title.
|
||||
pub end_pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> TryFrom<Pair<'a, Rule>> for Title<'a> {
|
||||
type Error = ErrorImpl;
|
||||
|
||||
fn try_from(pair: Pair<'a, Rule>) -> Result<Self, Self::Error> {
|
||||
let mut title = Title::default();
|
||||
|
||||
// check if the rule matches
|
||||
if pair.as_rule() != Rule::title {
|
||||
return Err(ErrorImpl::ParseError(String::from(
|
||||
"parsed section is not a title",
|
||||
)));
|
||||
}
|
||||
|
||||
// set the actual title
|
||||
if let Some(value) = pair.as_str().lines().next() {
|
||||
if value.chars().all(|v| v == '=') {
|
||||
return Err(ErrorImpl::ParseError(String::from(
|
||||
"document beginning found",
|
||||
)));
|
||||
} else if SECTION_EXPL_REGEX.is_match(value) {
|
||||
return Err(ErrorImpl::ParseError(String::from(
|
||||
"section explanation found",
|
||||
)));
|
||||
}
|
||||
title.value = value;
|
||||
} else {
|
||||
return Err(ErrorImpl::ParseError(String::from("invalid title")));
|
||||
}
|
||||
|
||||
// set token positions
|
||||
pair.tokens().for_each(|token| match token {
|
||||
Token::Start { rule, pos } => {
|
||||
if rule == Rule::title {
|
||||
title.start_pos = pos.pos();
|
||||
}
|
||||
}
|
||||
Token::End { rule, pos } => {
|
||||
if rule == Rule::title {
|
||||
title.end_pos = pos.pos();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(title)
|
||||
}
|
||||
}
|
|
@ -10,7 +10,6 @@ use rayon::prelude::*;
|
|||
use std::sync::Mutex;
|
||||
use systeroid_core::docs::{Documentation, SysctlSection};
|
||||
use systeroid_core::error::{Error, Result};
|
||||
use systeroid_core::reader;
|
||||
use systeroid_core::sysctl::Sysctl;
|
||||
use systeroid_parser::parser::RstParser;
|
||||
|
||||
|
@ -18,15 +17,27 @@ use systeroid_parser::parser::RstParser;
|
|||
pub fn run(args: Args) -> Result<()> {
|
||||
let mut sysctl = Sysctl::init()?;
|
||||
|
||||
let parsers = vec![
|
||||
RstParser {
|
||||
glob_path: "admin-guide/sysctl/*.rst",
|
||||
regex: "^\n([a-z].*)\n[=,-]{2,}+\n\n",
|
||||
section: None,
|
||||
},
|
||||
RstParser {
|
||||
glob_path: "networking/*-sysctl.rst",
|
||||
regex: "^([a-zA-Z0-9_/-]+)[ ]-[ ][a-zA-Z].*$",
|
||||
section: Some(SysctlSection::Net),
|
||||
},
|
||||
];
|
||||
|
||||
let param_docs = if let Some(kernel_docs) = args.kernel_docs {
|
||||
let param_docs = Mutex::new(Vec::new());
|
||||
SysctlSection::variants().par_iter().try_for_each(|s| {
|
||||
parsers.par_iter().try_for_each(|s| {
|
||||
let mut param_docs = param_docs
|
||||
.lock()
|
||||
.map_err(|e| Error::ThreadLockError(e.to_string()))?;
|
||||
let mut parse = |section: SysctlSection| -> Result<()> {
|
||||
let docs = reader::read_to_string(§ion.as_path(&kernel_docs))?;
|
||||
param_docs.extend(RstParser::parse_docs(&docs, section)?);
|
||||
let mut parse = |parser: RstParser| -> Result<()> {
|
||||
param_docs.extend(parser.parse(&kernel_docs)?);
|
||||
Ok(())
|
||||
};
|
||||
parse(*s)
|
||||
|
@ -48,7 +59,7 @@ pub fn run(args: Args) -> Result<()> {
|
|||
|
||||
for param in sysctl.parameters {
|
||||
println!(
|
||||
"{} ({})\n===\n{}\n",
|
||||
"{} ({})\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n{}\n",
|
||||
param.name,
|
||||
param.documentation.map(|d| d.name).unwrap_or_default(),
|
||||
param
|
||||
|
|
Loading…
Reference in a new issue