mirror of
https://github.com/orhun/systeroid
synced 2024-10-01 21:23:31 +00:00
chore(deps): remove parseit
crate from the workspace
Moved to https://github.com/orhun/parseit
This commit is contained in:
parent
eb0d4922f0
commit
c529bc6d11
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -486,6 +486,8 @@ checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
|||
[[package]]
|
||||
name = "parseit"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88b1326519874d2c452c203ca512d83be71b4703e8d848a9903f18771660f613"
|
||||
dependencies = [
|
||||
"flate2",
|
||||
"globwalk",
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
members = [
|
||||
"systeroid-core",
|
||||
"systeroid-tui",
|
||||
"systeroid",
|
||||
"parseit",
|
||||
"systeroid"
|
||||
]
|
||||
|
||||
[profile.dev]
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
[package]
|
||||
name = "parseit"
|
||||
version = "0.1.0"
|
||||
description = "Simple text file parsing library powered by regex and glob patterns"
|
||||
authors = ["Orhun Parmaksız <orhunparmaksiz@gmail.com>"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
readme = "README.md"
|
||||
homepage = "https://github.com/orhun/systeroid"
|
||||
repository = "https://github.com/orhun/systeroid"
|
||||
keywords = ["text", "parser", "regex", "glob"]
|
||||
categories = ["parsing"]
|
||||
edition = "2021"
|
||||
rust-version = "1.56.1"
|
||||
|
||||
[features]
|
||||
gzip = ["flate2"]
|
||||
|
||||
[dependencies]
|
||||
regex = "1.5.5"
|
||||
globwalk = "0.8.1"
|
||||
thiserror = "1.0.29"
|
||||
flate2 = { version = "1.0.22", optional = true }
|
|
@ -1,39 +0,0 @@
|
|||
# parseit
|
||||
|
||||
Simple text file parsing library powered by [regex](https://en.wikipedia.org/wiki/Regular_expression) and [glob patterns](<https://en.wikipedia.org/wiki/Glob_(programming)>).
|
||||
|
||||
```rs
|
||||
// Create a parser to parse sections in Cargo.toml (and optionally Cargo.lock)
|
||||
let parser = Parser::new(&["Cargo.*"], &["Cargo.toml"], r#"^\[(.*)\]$\n"#).unwrap();
|
||||
|
||||
// Parse the files in the manifest directory.
|
||||
let documents = parser
|
||||
.parse(&PathBuf::from(env!("CARGO_MANIFEST_DIR")))
|
||||
.unwrap();
|
||||
|
||||
// Print results.
|
||||
for document in documents {
|
||||
println!("Path: {}", document.path.to_string_lossy());
|
||||
for paragraph in document.paragraphs {
|
||||
println!("Title: {}", paragraph.title);
|
||||
println!("Contents: {}", paragraph.contents);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
See [examples](./examples/).
|
||||
|
||||
## License
|
||||
|
||||
Licensed under either of [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) or [The MIT License](http://opensource.org/licenses/MIT) at your option.
|
||||
|
||||
### Contribution
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache 2.0 License, shall be dual licensed as above, without any additional terms or conditions.
|
||||
|
||||
## Copyright
|
||||
|
||||
Copyright © 2022, [Orhun Parmaksız](mailto:orhunparmaksiz@gmail.com)
|
|
@ -1,26 +0,0 @@
|
|||
use parseit::error::Error;
|
||||
use parseit::parser::Parser;
|
||||
use std::path::PathBuf;
|
||||
|
||||
// Parse Cargo manifest and print sections.
|
||||
fn main() -> Result<(), Error> {
|
||||
// Create a parser.
|
||||
let parser = Parser::new(&["Cargo.*"], &[], r#"^\[(.*)\]$\n"#)?;
|
||||
|
||||
// Parse documents.
|
||||
let documents = parser.parse(&PathBuf::from(env!("CARGO_MANIFEST_DIR")))?;
|
||||
|
||||
// Print results.
|
||||
println!("Total parsed files: {}", documents.len());
|
||||
for document in documents {
|
||||
println!("Contents of {}:", document.path.to_string_lossy());
|
||||
println!();
|
||||
for paragraph in document.paragraphs {
|
||||
println!("[{}]", paragraph.title);
|
||||
println!("{}", paragraph.contents);
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -1,98 +0,0 @@
|
|||
use crate::error::Error;
|
||||
use regex::Captures;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Representation of a paragraph in a [`Document`].
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Paragraph {
|
||||
/// Paragraph title.
|
||||
pub title: String,
|
||||
/// Raw contents of a paragraph.
|
||||
pub contents: String,
|
||||
}
|
||||
|
||||
impl Paragraph {
|
||||
/// Constructs a new instance.
|
||||
pub fn new(title: String, contents: String) -> Self {
|
||||
Self { title, contents }
|
||||
}
|
||||
|
||||
/// Constructs a vector of paragraphs from the given regex capture groups.
|
||||
pub fn from_captures(
|
||||
capture_group: Vec<Captures<'_>>,
|
||||
input: &str,
|
||||
) -> Result<Vec<Self>, Error> {
|
||||
let mut paragraphs = Vec::new();
|
||||
for (i, captures) in capture_group.iter().enumerate() {
|
||||
let content_capture = captures.get(0).ok_or(Error::CaptureError)?;
|
||||
let title_capture = captures.get(1).ok_or(Error::CaptureError)?;
|
||||
paragraphs.push(Paragraph::new(
|
||||
title_capture.as_str().trim().to_string(),
|
||||
if let Some(next_capture) = capture_group.get(i + 1) {
|
||||
let next_capture = next_capture
|
||||
.iter()
|
||||
.next()
|
||||
.flatten()
|
||||
.ok_or(Error::CaptureError)?;
|
||||
(input[content_capture.end()..next_capture.start()]).to_string()
|
||||
} else {
|
||||
(input[content_capture.end()..]).to_string()
|
||||
}
|
||||
.lines()
|
||||
.map(|v| v.trim_start_matches('\t'))
|
||||
.collect::<Vec<&str>>()
|
||||
.join("\n")
|
||||
.trim()
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
Ok(paragraphs)
|
||||
}
|
||||
}
|
||||
|
||||
/// Representation of a parsed document which consists of paragraphs.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Document {
|
||||
/// Paragraphs in the document.
|
||||
pub paragraphs: Vec<Paragraph>,
|
||||
/// Source of the document.
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
impl Document {
|
||||
/// Constructs a new instance.
|
||||
pub fn new(paragraphs: Vec<Paragraph>, path: PathBuf) -> Self {
|
||||
Self { paragraphs, path }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::reader;
|
||||
use regex::RegexBuilder;
|
||||
|
||||
#[test]
|
||||
fn test_paragraph() -> Result<(), Error> {
|
||||
let input =
|
||||
reader::read_to_string(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml"))?;
|
||||
let captures = RegexBuilder::new(r#"^(\[[a-zA-Z]+\])\n"#)
|
||||
.multi_line(true)
|
||||
.build()?
|
||||
.captures_iter(&input)
|
||||
.collect::<Vec<_>>();
|
||||
let paragraphs = Paragraph::from_captures(captures, &input)?;
|
||||
assert!(paragraphs.len() >= 2);
|
||||
|
||||
assert_eq!("[package]", paragraphs[0].title);
|
||||
assert!(paragraphs[0]
|
||||
.contents
|
||||
.contains(&format!("version = \"{}\"", env!("CARGO_PKG_VERSION"))));
|
||||
|
||||
if let Some(paragraph) = paragraphs.iter().find(|p| p.title == "[dependencies]") {
|
||||
assert!(paragraph.contents.contains("regex = "));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
use thiserror::Error as ThisError;
|
||||
|
||||
/// Custom error type.
|
||||
#[derive(Debug, ThisError)]
|
||||
pub enum Error {
|
||||
/// Error that may occur during I/O operations.
|
||||
#[error("IO error: `{0}`")]
|
||||
IoError(#[from] std::io::Error),
|
||||
/// Error that may occur due to invalid UTF-8 strings.
|
||||
#[error("non-UTF-8 string")]
|
||||
Utf8Error,
|
||||
/// Error that may occur when the capture group does not exist.
|
||||
#[error("capture group does not exist")]
|
||||
CaptureError,
|
||||
/// Error that may occur when the glob pattern returns zero results.
|
||||
#[error("could not find any files to parse")]
|
||||
EmptyFileListError,
|
||||
/// Error that may occur when a required file for parsing does not exist.
|
||||
#[error("required file missing: `{0}`")]
|
||||
MissingFileError(String),
|
||||
/// Error that may occur while traversing paths using a glob pattern.
|
||||
#[error("glob error: `{0}`")]
|
||||
GlobError(#[from] globwalk::GlobError),
|
||||
/// Error that may occur during the compilation of a regex.
|
||||
#[error("regex error: `{0}`")]
|
||||
RegexError(#[from] regex::Error),
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
//! Simple text file parsing library powered by [regex](https://en.wikipedia.org/wiki/Regular_expression) and [glob patterns](https://en.wikipedia.org/wiki/Glob_(programming)).
|
||||
|
||||
#![warn(missing_docs, clippy::unwrap_used)]
|
||||
|
||||
/// Export regex crate.
|
||||
pub use regex;
|
||||
|
||||
/// Export globwalk crate.
|
||||
pub use globwalk;
|
||||
|
||||
/// Document parser.
|
||||
pub mod parser;
|
||||
|
||||
/// Parser results.
|
||||
pub mod document;
|
||||
|
||||
/// Error implementation.
|
||||
pub mod error;
|
||||
|
||||
/// File reader.
|
||||
pub mod reader;
|
|
@ -1,113 +0,0 @@
|
|||
use crate::document::{Document, Paragraph};
|
||||
use crate::error::Error;
|
||||
use crate::reader;
|
||||
use globwalk::DirEntry;
|
||||
use regex::{Captures, Regex, RegexBuilder};
|
||||
use std::path::Path;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
/// Parser for text files.
|
||||
///
|
||||
/// It is responsible for traversing the path specified with
|
||||
/// a glob pattern and parsing the contents of the files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Parser<'a> {
|
||||
/// Glob patterns to specify the files to parse.
|
||||
pub glob_path: &'a [&'a str],
|
||||
/// Files to check during path traversal.
|
||||
pub required_files: &'a [&'a str],
|
||||
/// Regular expression to use for parsing.
|
||||
pub regex: Regex,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
/// Constructs a new instance.
|
||||
pub fn new(
|
||||
glob_path: &'a [&'a str],
|
||||
required_files: &'a [&'a str],
|
||||
regex: &'a str,
|
||||
) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
glob_path,
|
||||
required_files,
|
||||
regex: RegexBuilder::new(regex).multi_line(true).build()?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses the files in the given base path and returns the documents.
|
||||
pub fn parse(&self, base_path: &Path) -> Result<Vec<Document>, Error> {
|
||||
let mut documents = Vec::new();
|
||||
let mut glob_files = Vec::new();
|
||||
for glob in self.glob_path {
|
||||
glob_files.extend(
|
||||
globwalk::glob(base_path.join(glob).to_str().ok_or(Error::Utf8Error)?)?
|
||||
.filter_map(StdResult::ok)
|
||||
.collect::<Vec<DirEntry>>(),
|
||||
);
|
||||
}
|
||||
if glob_files.is_empty() {
|
||||
return Err(Error::EmptyFileListError);
|
||||
}
|
||||
self.required_files
|
||||
.iter()
|
||||
.filter(|file_name| !file_name.is_empty())
|
||||
.try_for_each(|file_name| {
|
||||
glob_files
|
||||
.iter()
|
||||
.find(|file| file.file_name().to_str() == Some(file_name))
|
||||
.map(drop)
|
||||
.ok_or_else(|| Error::MissingFileError(file_name.to_string()))
|
||||
})?;
|
||||
for file in glob_files {
|
||||
let input = {
|
||||
#[cfg(feature = "gzip")]
|
||||
if file.path().extension().and_then(|ext| ext.to_str()) == Some("gz") {
|
||||
reader::read_gzip(file.path())
|
||||
} else {
|
||||
reader::read_to_string(file.path())
|
||||
}
|
||||
#[cfg(not(feature = "gzip"))]
|
||||
reader::read_to_string(file.path())
|
||||
}?;
|
||||
let capture_group = self
|
||||
.regex
|
||||
.captures_iter(&input)
|
||||
.collect::<Vec<Captures<'_>>>();
|
||||
documents.push(Document::new(
|
||||
Paragraph::from_captures(capture_group, &input)?,
|
||||
file.path().to_path_buf(),
|
||||
));
|
||||
}
|
||||
Ok(documents)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn test_document_parser() -> Result<(), Error> {
|
||||
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
let parser = Parser::new(&["Cargo.*"], &[], r#"^(\[package\])\n"#)?;
|
||||
let mut documents = parser.parse(base_path.as_path())?;
|
||||
|
||||
assert!(documents[0].paragraphs[0]
|
||||
.contents
|
||||
.contains(&format!("name = \"{}\"", env!("CARGO_PKG_NAME"))));
|
||||
|
||||
documents[0].paragraphs[0].contents = String::new();
|
||||
assert_eq!(
|
||||
Document {
|
||||
paragraphs: vec![Paragraph {
|
||||
title: String::from("[package]"),
|
||||
contents: String::new(),
|
||||
}],
|
||||
path: base_path.join("Cargo.toml")
|
||||
},
|
||||
documents[0]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,107 +0,0 @@
|
|||
use std::fs::File;
|
||||
use std::io::{
|
||||
BufRead, BufReader as IoBufReader, Error as IoError, ErrorKind as IoErrorKind,
|
||||
Result as IoResult,
|
||||
};
|
||||
use std::path::Path;
|
||||
use std::rc::Rc;
|
||||
use std::str;
|
||||
|
||||
/// Default buffer size of the reader.
|
||||
const DEFAULT_BUFFER_SIZE: usize = 1024;
|
||||
|
||||
/// Buffered reader.
|
||||
pub struct BufReader {
|
||||
/// Inner type.
|
||||
reader: IoBufReader<File>,
|
||||
/// Buffer.
|
||||
buffer: Rc<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl BufReader {
|
||||
/// Opens the given file and initializes the buffered reader with given buffer size.
|
||||
pub fn open<P: AsRef<Path>>(path: P, buffer_size: Option<usize>) -> IoResult<Self> {
|
||||
let file = File::open(path)?;
|
||||
let reader = IoBufReader::new(file);
|
||||
let buffer = Self::new_buffer(buffer_size);
|
||||
Ok(Self { reader, buffer })
|
||||
}
|
||||
|
||||
/// Creates a new buffer with the given size.
|
||||
fn new_buffer(buffer_size: Option<usize>) -> Rc<Vec<u8>> {
|
||||
Rc::new(Vec::with_capacity(
|
||||
buffer_size.unwrap_or(DEFAULT_BUFFER_SIZE),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for BufReader {
|
||||
type Item = IoResult<Rc<Vec<u8>>>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let buffer = match Rc::get_mut(&mut self.buffer) {
|
||||
Some(rc_buffer) => {
|
||||
rc_buffer.clear();
|
||||
rc_buffer
|
||||
}
|
||||
None => {
|
||||
self.buffer = Self::new_buffer(None);
|
||||
Rc::make_mut(&mut self.buffer)
|
||||
}
|
||||
};
|
||||
self.reader
|
||||
.read_until(b'\n', buffer)
|
||||
.map(|u| {
|
||||
if u == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(Rc::clone(&self.buffer))
|
||||
}
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the contents of the file into a string.
|
||||
///
|
||||
/// Uses [`BufReader`] under the hood.
|
||||
pub fn read_to_string<P: AsRef<Path>>(path: P) -> IoResult<String> {
|
||||
let mut lines = Vec::<String>::new();
|
||||
for line in BufReader::open(path, None)? {
|
||||
lines.push(
|
||||
str::from_utf8(&line?)
|
||||
.map_err(|e| IoError::new(IoErrorKind::Other, e))?
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
Ok(lines.join(""))
|
||||
}
|
||||
|
||||
/// Reads (decodes) the given gzip file into a string.
|
||||
///
|
||||
/// Uses [`BufReader`] under the hood.
|
||||
#[cfg(feature = "gzip")]
|
||||
pub fn read_gzip<P: AsRef<Path>>(path: P) -> IoResult<String> {
|
||||
use std::io::Read;
|
||||
let mut bytes = Vec::<u8>::new();
|
||||
for read_bytes in BufReader::open(path, None)? {
|
||||
bytes.extend(read_bytes?.to_vec());
|
||||
}
|
||||
let mut gz = flate2::read::GzDecoder::new(&bytes[..]);
|
||||
let mut data = String::new();
|
||||
gz.read_to_string(&mut data)?;
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn test_file_reader() -> Result<(), Error> {
|
||||
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml");
|
||||
assert!(read_to_string(path)?.contains(&format!("name = \"{}\"", env!("CARGO_PKG_NAME"))));
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
use parseit::error::Error;
|
||||
use parseit::parser::Parser;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn test_parser() -> Result<(), Error> {
|
||||
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
let parser = Parser::new(&["src/*.rs"], &["lib.rs"], r#"^(#\[cfg\(test\)\])$\n"#)?;
|
||||
let documents = parser.parse(base_path.as_path())?;
|
||||
|
||||
assert!(documents
|
||||
.iter()
|
||||
.find(|d| d.path == PathBuf::from(base_path.join("src").join("lib.rs")))
|
||||
.unwrap()
|
||||
.paragraphs
|
||||
.is_empty());
|
||||
|
||||
assert!(documents
|
||||
.iter()
|
||||
.find(|d| d.path == PathBuf::from(base_path.join("src").join("reader.rs")))
|
||||
.unwrap()
|
||||
.paragraphs[0]
|
||||
.contents
|
||||
.contains("fn test_file_reader()"));
|
||||
|
||||
documents.iter().for_each(|document| {
|
||||
document.paragraphs.iter().for_each(|paragraph| {
|
||||
assert_eq!("#[cfg(test)]", paragraph.title);
|
||||
assert!(paragraph.contents.contains("mod tests"));
|
||||
assert!(paragraph.contents.contains("use super::*;"));
|
||||
});
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -20,8 +20,4 @@ colored = "2.0.0"
|
|||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = "1.0.79"
|
||||
dirs-next = "2.0.0"
|
||||
|
||||
[dependencies.parseit]
|
||||
version = "0.1.0"
|
||||
path = "../parseit"
|
||||
features = ["gzip"]
|
||||
parseit = { version = "0.1.0", features = ["gzip"] }
|
||||
|
|
Loading…
Reference in a new issue