Merge branch 'main' into add_readme

This commit is contained in:
Orhun Parmaksız 2022-04-10 14:53:11 +03:00
commit 8c79346195
No known key found for this signature in database
GPG key ID: F83424824B3E4B90
12 changed files with 4 additions and 495 deletions

2
Cargo.lock generated
View file

@ -486,6 +486,8 @@ checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
[[package]]
name = "parseit"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88b1326519874d2c452c203ca512d83be71b4703e8d848a9903f18771660f613"
dependencies = [
"flate2",
"globwalk",

View file

@ -2,8 +2,7 @@
members = [
"systeroid-core",
"systeroid-tui",
"systeroid",
"parseit",
"systeroid"
]
[profile.dev]

View file

@ -1,22 +0,0 @@
[package]
name = "parseit"
version = "0.1.0"
description = "Simple text file parsing library powered by regex and glob patterns"
authors = ["Orhun Parmaksız <orhunparmaksiz@gmail.com>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
homepage = "https://github.com/orhun/systeroid"
repository = "https://github.com/orhun/systeroid"
keywords = ["text", "parser", "regex", "glob"]
categories = ["parsing"]
edition = "2021"
rust-version = "1.56.1"
[features]
gzip = ["flate2"]
[dependencies]
regex = "1.5.5"
globwalk = "0.8.1"
thiserror = "1.0.29"
flate2 = { version = "1.0.22", optional = true }

View file

@ -1,39 +0,0 @@
# parseit
Simple text file parsing library powered by [regex](https://en.wikipedia.org/wiki/Regular_expression) and [glob patterns](<https://en.wikipedia.org/wiki/Glob_(programming)>).
```rs
// Create a parser to parse sections in Cargo.toml (and optionally Cargo.lock)
let parser = Parser::new(&["Cargo.*"], &["Cargo.toml"], r#"^\[(.*)\]$\n"#).unwrap();
// Parse the files in the manifest directory.
let documents = parser
.parse(&PathBuf::from(env!("CARGO_MANIFEST_DIR")))
.unwrap();
// Print results.
for document in documents {
println!("Path: {}", document.path.to_string_lossy());
for paragraph in document.paragraphs {
println!("Title: {}", paragraph.title);
println!("Contents: {}", paragraph.contents);
println!();
}
}
```
## Examples
See [examples](./examples/).
## License
Licensed under either of [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) or [The MIT License](http://opensource.org/licenses/MIT) at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache 2.0 License, shall be dual licensed as above, without any additional terms or conditions.
## Copyright
Copyright © 2022, [Orhun Parmaksız](mailto:orhunparmaksiz@gmail.com)

View file

@ -1,26 +0,0 @@
use parseit::error::Error;
use parseit::parser::Parser;
use std::path::PathBuf;
// Parse Cargo manifest and print sections.
fn main() -> Result<(), Error> {
// Create a parser.
let parser = Parser::new(&["Cargo.*"], &[], r#"^\[(.*)\]$\n"#)?;
// Parse documents.
let documents = parser.parse(&PathBuf::from(env!("CARGO_MANIFEST_DIR")))?;
// Print results.
println!("Total parsed files: {}", documents.len());
for document in documents {
println!("Contents of {}:", document.path.to_string_lossy());
println!();
for paragraph in document.paragraphs {
println!("[{}]", paragraph.title);
println!("{}", paragraph.contents);
println!();
}
}
Ok(())
}

View file

@ -1,98 +0,0 @@
use crate::error::Error;
use regex::Captures;
use std::path::PathBuf;
/// Representation of a paragraph in a [`Document`].
#[derive(Clone, Debug, PartialEq)]
pub struct Paragraph {
/// Paragraph title.
pub title: String,
/// Raw contents of a paragraph.
pub contents: String,
}
impl Paragraph {
/// Constructs a new instance.
pub fn new(title: String, contents: String) -> Self {
Self { title, contents }
}
/// Constructs a vector of paragraphs from the given regex capture groups.
pub fn from_captures(
capture_group: Vec<Captures<'_>>,
input: &str,
) -> Result<Vec<Self>, Error> {
let mut paragraphs = Vec::new();
for (i, captures) in capture_group.iter().enumerate() {
let content_capture = captures.get(0).ok_or(Error::CaptureError)?;
let title_capture = captures.get(1).ok_or(Error::CaptureError)?;
paragraphs.push(Paragraph::new(
title_capture.as_str().trim().to_string(),
if let Some(next_capture) = capture_group.get(i + 1) {
let next_capture = next_capture
.iter()
.next()
.flatten()
.ok_or(Error::CaptureError)?;
(input[content_capture.end()..next_capture.start()]).to_string()
} else {
(input[content_capture.end()..]).to_string()
}
.lines()
.map(|v| v.trim_start_matches('\t'))
.collect::<Vec<&str>>()
.join("\n")
.trim()
.to_string(),
));
}
Ok(paragraphs)
}
}
/// Representation of a parsed document which consists of paragraphs.
#[derive(Clone, Debug, PartialEq)]
pub struct Document {
/// Paragraphs in the document.
pub paragraphs: Vec<Paragraph>,
/// Source of the document.
pub path: PathBuf,
}
impl Document {
/// Constructs a new instance.
pub fn new(paragraphs: Vec<Paragraph>, path: PathBuf) -> Self {
Self { paragraphs, path }
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::reader;
use regex::RegexBuilder;
#[test]
fn test_paragraph() -> Result<(), Error> {
let input =
reader::read_to_string(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml"))?;
let captures = RegexBuilder::new(r#"^(\[[a-zA-Z]+\])\n"#)
.multi_line(true)
.build()?
.captures_iter(&input)
.collect::<Vec<_>>();
let paragraphs = Paragraph::from_captures(captures, &input)?;
assert!(paragraphs.len() >= 2);
assert_eq!("[package]", paragraphs[0].title);
assert!(paragraphs[0]
.contents
.contains(&format!("version = \"{}\"", env!("CARGO_PKG_VERSION"))));
if let Some(paragraph) = paragraphs.iter().find(|p| p.title == "[dependencies]") {
assert!(paragraph.contents.contains("regex = "));
}
Ok(())
}
}

View file

@ -1,27 +0,0 @@
use thiserror::Error as ThisError;
/// Custom error type.
#[derive(Debug, ThisError)]
pub enum Error {
/// Error that may occur during I/O operations.
#[error("IO error: `{0}`")]
IoError(#[from] std::io::Error),
/// Error that may occur due to invalid UTF-8 strings.
#[error("non-UTF-8 string")]
Utf8Error,
/// Error that may occur when the capture group does not exist.
#[error("capture group does not exist")]
CaptureError,
/// Error that may occur when the glob pattern returns zero results.
#[error("could not find any files to parse")]
EmptyFileListError,
/// Error that may occur when a required file for parsing does not exist.
#[error("required file missing: `{0}`")]
MissingFileError(String),
/// Error that may occur while traversing paths using a glob pattern.
#[error("glob error: `{0}`")]
GlobError(#[from] globwalk::GlobError),
/// Error that may occur during the compilation of a regex.
#[error("regex error: `{0}`")]
RegexError(#[from] regex::Error),
}

View file

@ -1,21 +0,0 @@
//! Simple text file parsing library powered by [regex](https://en.wikipedia.org/wiki/Regular_expression) and [glob patterns](https://en.wikipedia.org/wiki/Glob_(programming)).
#![warn(missing_docs, clippy::unwrap_used)]
/// Export regex crate.
pub use regex;
/// Export globwalk crate.
pub use globwalk;
/// Document parser.
pub mod parser;
/// Parser results.
pub mod document;
/// Error implementation.
pub mod error;
/// File reader.
pub mod reader;

View file

@ -1,113 +0,0 @@
use crate::document::{Document, Paragraph};
use crate::error::Error;
use crate::reader;
use globwalk::DirEntry;
use regex::{Captures, Regex, RegexBuilder};
use std::path::Path;
use std::result::Result as StdResult;
/// Parser for text files.
///
/// It is responsible for traversing the path specified with
/// a glob pattern and parsing the contents of the files.
#[derive(Clone, Debug)]
pub struct Parser<'a> {
/// Glob patterns to specify the files to parse.
pub glob_path: &'a [&'a str],
/// Files to check during path traversal.
pub required_files: &'a [&'a str],
/// Regular expression to use for parsing.
pub regex: Regex,
}
impl<'a> Parser<'a> {
/// Constructs a new instance.
pub fn new(
glob_path: &'a [&'a str],
required_files: &'a [&'a str],
regex: &'a str,
) -> Result<Self, Error> {
Ok(Self {
glob_path,
required_files,
regex: RegexBuilder::new(regex).multi_line(true).build()?,
})
}
/// Parses the files in the given base path and returns the documents.
pub fn parse(&self, base_path: &Path) -> Result<Vec<Document>, Error> {
let mut documents = Vec::new();
let mut glob_files = Vec::new();
for glob in self.glob_path {
glob_files.extend(
globwalk::glob(base_path.join(glob).to_str().ok_or(Error::Utf8Error)?)?
.filter_map(StdResult::ok)
.collect::<Vec<DirEntry>>(),
);
}
if glob_files.is_empty() {
return Err(Error::EmptyFileListError);
}
self.required_files
.iter()
.filter(|file_name| !file_name.is_empty())
.try_for_each(|file_name| {
glob_files
.iter()
.find(|file| file.file_name().to_str() == Some(file_name))
.map(drop)
.ok_or_else(|| Error::MissingFileError(file_name.to_string()))
})?;
for file in glob_files {
let input = {
#[cfg(feature = "gzip")]
if file.path().extension().and_then(|ext| ext.to_str()) == Some("gz") {
reader::read_gzip(file.path())
} else {
reader::read_to_string(file.path())
}
#[cfg(not(feature = "gzip"))]
reader::read_to_string(file.path())
}?;
let capture_group = self
.regex
.captures_iter(&input)
.collect::<Vec<Captures<'_>>>();
documents.push(Document::new(
Paragraph::from_captures(capture_group, &input)?,
file.path().to_path_buf(),
));
}
Ok(documents)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_document_parser() -> Result<(), Error> {
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let parser = Parser::new(&["Cargo.*"], &[], r#"^(\[package\])\n"#)?;
let mut documents = parser.parse(base_path.as_path())?;
assert!(documents[0].paragraphs[0]
.contents
.contains(&format!("name = \"{}\"", env!("CARGO_PKG_NAME"))));
documents[0].paragraphs[0].contents = String::new();
assert_eq!(
Document {
paragraphs: vec![Paragraph {
title: String::from("[package]"),
contents: String::new(),
}],
path: base_path.join("Cargo.toml")
},
documents[0]
);
Ok(())
}
}

View file

@ -1,107 +0,0 @@
use std::fs::File;
use std::io::{
BufRead, BufReader as IoBufReader, Error as IoError, ErrorKind as IoErrorKind,
Result as IoResult,
};
use std::path::Path;
use std::rc::Rc;
use std::str;
/// Default buffer size of the reader.
const DEFAULT_BUFFER_SIZE: usize = 1024;
/// Buffered reader.
pub struct BufReader {
/// Inner type.
reader: IoBufReader<File>,
/// Buffer.
buffer: Rc<Vec<u8>>,
}
impl BufReader {
/// Opens the given file and initializes the buffered reader with given buffer size.
pub fn open<P: AsRef<Path>>(path: P, buffer_size: Option<usize>) -> IoResult<Self> {
let file = File::open(path)?;
let reader = IoBufReader::new(file);
let buffer = Self::new_buffer(buffer_size);
Ok(Self { reader, buffer })
}
/// Creates a new buffer with the given size.
fn new_buffer(buffer_size: Option<usize>) -> Rc<Vec<u8>> {
Rc::new(Vec::with_capacity(
buffer_size.unwrap_or(DEFAULT_BUFFER_SIZE),
))
}
}
impl Iterator for BufReader {
type Item = IoResult<Rc<Vec<u8>>>;
fn next(&mut self) -> Option<Self::Item> {
let buffer = match Rc::get_mut(&mut self.buffer) {
Some(rc_buffer) => {
rc_buffer.clear();
rc_buffer
}
None => {
self.buffer = Self::new_buffer(None);
Rc::make_mut(&mut self.buffer)
}
};
self.reader
.read_until(b'\n', buffer)
.map(|u| {
if u == 0 {
None
} else {
Some(Rc::clone(&self.buffer))
}
})
.transpose()
}
}
/// Reads the contents of the file into a string.
///
/// Uses [`BufReader`] under the hood.
pub fn read_to_string<P: AsRef<Path>>(path: P) -> IoResult<String> {
let mut lines = Vec::<String>::new();
for line in BufReader::open(path, None)? {
lines.push(
str::from_utf8(&line?)
.map_err(|e| IoError::new(IoErrorKind::Other, e))?
.to_string(),
);
}
Ok(lines.join(""))
}
/// Reads (decodes) the given gzip file into a string.
///
/// Uses [`BufReader`] under the hood.
#[cfg(feature = "gzip")]
pub fn read_gzip<P: AsRef<Path>>(path: P) -> IoResult<String> {
use std::io::Read;
let mut bytes = Vec::<u8>::new();
for read_bytes in BufReader::open(path, None)? {
bytes.extend(read_bytes?.to_vec());
}
let mut gz = flate2::read::GzDecoder::new(&bytes[..]);
let mut data = String::new();
gz.read_to_string(&mut data)?;
Ok(data)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Error;
use std::path::PathBuf;
#[test]
fn test_file_reader() -> Result<(), Error> {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml");
assert!(read_to_string(path)?.contains(&format!("name = \"{}\"", env!("CARGO_PKG_NAME"))));
Ok(())
}
}

View file

@ -1,35 +0,0 @@
use parseit::error::Error;
use parseit::parser::Parser;
use std::path::PathBuf;
#[test]
fn test_parser() -> Result<(), Error> {
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let parser = Parser::new(&["src/*.rs"], &["lib.rs"], r#"^(#\[cfg\(test\)\])$\n"#)?;
let documents = parser.parse(base_path.as_path())?;
assert!(documents
.iter()
.find(|d| d.path == PathBuf::from(base_path.join("src").join("lib.rs")))
.unwrap()
.paragraphs
.is_empty());
assert!(documents
.iter()
.find(|d| d.path == PathBuf::from(base_path.join("src").join("reader.rs")))
.unwrap()
.paragraphs[0]
.contents
.contains("fn test_file_reader()"));
documents.iter().for_each(|document| {
document.paragraphs.iter().for_each(|paragraph| {
assert_eq!("#[cfg(test)]", paragraph.title);
assert!(paragraph.contents.contains("mod tests"));
assert!(paragraph.contents.contains("use super::*;"));
});
});
Ok(())
}

View file

@ -20,8 +20,4 @@ colored = "2.0.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.79"
dirs-next = "2.0.0"
[dependencies.parseit]
version = "0.1.0"
path = "../parseit"
features = ["gzip"]
parseit = { version = "0.1.0", features = ["gzip"] }