refactor + add offset arg

This commit is contained in:
JMARyA 2023-10-26 13:09:30 +02:00
parent 168dddd6b1
commit 2bdeae1994
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
5 changed files with 226 additions and 188 deletions

22
Cargo.lock generated
View file

@ -200,6 +200,27 @@ dependencies = [
"winapi",
]
[[package]]
name = "csv"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "env_logger"
version = "0.10.0"
@ -347,6 +368,7 @@ dependencies = [
"chrono",
"clap",
"comfy-table",
"csv",
"env_logger",
"log",
"regex",

View file

@ -15,3 +15,4 @@ comfy-table = "7.1.0"
env_logger = "0.10.0"
log = "0.4.20"
chrono = "0.4.31"
csv = "1.3.0"

View file

@ -5,7 +5,18 @@ pub fn get_args() -> ArgMatches {
.about("Query markdown files")
.arg(arg!([dir] "Directory to scan").required(true))
.arg(arg!(-j --json "Output result as JSON").required(false))
.arg(arg!(-l --limit <LIMIT> "Limit number of results returned").required(false))
.arg(
arg!(-l --limit <LIMIT> "Limit number of results returned")
.required(false)
.default_value("0")
.allow_negative_numbers(false),
)
.arg(
arg!(--offset <OFFSET> "Offset results by a factor. Useful when used with --limit")
.required(false)
.allow_negative_numbers(false)
.default_value("0"),
)
.arg(arg!(-f --filter <FILTER>... "Filter to apply to the documents").required(false))
.arg(
arg!(-c --column <COLUMN>... "Specify output columns")

View file

@ -5,13 +5,11 @@ use txd::DataType;
pub fn get_frontmatter(markdown: &str) -> Option<String> {
let frontmatter_regex = regex::Regex::new(r"(?s)^---\s*\n(.*?)\n---").unwrap();
if let Some(captures) = frontmatter_regex.captures(markdown) {
frontmatter_regex.captures(markdown).and_then(|captures| {
let frontmatter = captures.get(1).map(|m| m.as_str().to_string());
frontmatter
} else {
None
}
})
}
fn system_time_to_date_time(t: std::time::SystemTime) -> chrono::DateTime<chrono::Utc> {
@ -31,7 +29,7 @@ fn system_time_to_date_time(t: std::time::SystemTime) -> chrono::DateTime<chrono
chrono::TimeZone::timestamp_opt(&chrono::Utc, sec, nsec).unwrap()
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Document {
pub path: String,
pub frontmatter: serde_yaml::Value,
@ -42,9 +40,12 @@ pub struct Index {
pub documents: Vec<Document>,
}
/// Create a markdown document index over `dir`
pub fn scan_dir(dir: &str) -> Index {
let mut i = Index { documents: vec![] };
type Table = Vec<Vec<String>>;
impl Index {
/// Create a markdown document index over `dir`
pub fn new(dir: &str) -> Self {
let mut i = Self { documents: vec![] };
for e in walkdir::WalkDir::new(dir)
.into_iter()
@ -74,119 +75,44 @@ pub fn scan_dir(dir: &str) -> Index {
}
i
}
}
/// Get a key from document.
/// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned.
fn get_key(d: &Document, key: &str) -> String {
match key {
"file.title" => {
let path = std::path::Path::new(&d.path);
return path.file_stem().unwrap().to_str().unwrap().to_string();
}
"file.name" => {
let path = std::path::Path::new(&d.path);
return path.file_name().unwrap().to_str().unwrap().to_string();
}
"file.parent" => {
let path = std::path::Path::new(&d.path);
return path
.parent()
.unwrap()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_string();
}
"file.folder" => {
let path = std::path::Path::new(&d.path);
return path.parent().unwrap().to_str().unwrap().to_string();
}
"file.ext" => {
let path = std::path::Path::new(&d.path);
return path.extension().unwrap().to_str().unwrap().to_string();
}
"file.size" => {
let path = std::path::Path::new(&d.path);
return path.metadata().unwrap().len().to_string();
}
"file.ctime" => {
let path = std::path::Path::new(&d.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.to_rfc3339();
}
"file.cday" => {
let path = std::path::Path::new(&d.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.format("%Y-%m-%d")
.to_string();
}
"file.mtime" => {
let path = std::path::Path::new(&d.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.to_rfc3339();
}
"file.mday" => {
let path = std::path::Path::new(&d.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.format("%Y-%m-%d")
.to_string();
}
"file.path" => {
return d.path.clone();
}
_ => {}
}
if let Some(val) = d.frontmatter.as_mapping().unwrap().get(key) {
stringify(val)
} else {
String::new()
}
}
fn stringify(val: &serde_yaml::Value) -> String {
match val {
serde_yaml::Value::Null => String::new(),
serde_yaml::Value::Bool(b) => b.to_string(),
serde_yaml::Value::Number(n) => n.to_string(),
serde_yaml::Value::String(s) => s.to_owned(),
serde_yaml::Value::Sequence(_) => serde_json::to_string(&val).unwrap(),
serde_yaml::Value::Mapping(_o) => todo!(),
serde_yaml::Value::Tagged(_) => unimplemented!(),
}
}
type Table = Vec<Vec<String>>;
/// Build a table with specified columns from index
#[must_use]
pub fn select_columns(i: &Index, col: &[String]) -> Table {
/// Build a table with specified columns from index within specified scope
#[must_use]
pub fn select_columns(&self, col: &[String], limit: usize, offset: usize) -> Table {
let mut rows = vec![];
for doc in &i.documents {
let scope: Vec<_> = self.documents.clone().into_iter().skip(offset).collect();
let scope = if limit == 0 {
scope
} else {
scope.into_iter().take(limit).collect()
};
for doc in scope {
let mut rcol = vec![];
for c in col {
rcol.push(get_key(doc, c));
rcol.push(doc.get_key(c));
}
rows.push(rcol);
}
rows
}
}
/// Apply filters to the documents of the index returning a new filtered index
#[must_use]
pub fn filter_documents(i: Index, filters: &[txd::filter::Filter]) -> Index {
/// Apply filters to the documents of the index returning a new filtered index
#[must_use]
pub fn filter_documents(&self, filters: &[txd::filter::Filter]) -> Self {
// TODO : Implement option for chaining filters with AND OR
let docs: Vec<_> = i
let docs: Vec<_> = self
.documents
.into_iter()
.filter_map(|x| {
.iter()
.filter(|x| {
let mut is_included = true;
for f in filters {
let a_str = get_key(&x, &f.0);
let a_str = x.get_key(&f.0);
let mut a = txd::parse(&a_str);
let b = txd::parse(&f.2);
@ -207,13 +133,95 @@ pub fn filter_documents(i: Index, filters: &[txd::filter::Filter]) -> Index {
is_included = false;
}
}
if is_included {
Some(x)
} else {
None
}
is_included
})
.cloned()
.collect();
Index { documents: docs }
Self { documents: docs }
}
}
impl Document {
/// Get a key from document.
/// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned.
fn get_key(&self, key: &str) -> String {
match key {
"file.title" => {
let path = std::path::Path::new(&self.path);
return path.file_stem().unwrap().to_str().unwrap().to_string();
}
"file.name" => {
let path = std::path::Path::new(&self.path);
return path.file_name().unwrap().to_str().unwrap().to_string();
}
"file.parent" => {
let path = std::path::Path::new(&self.path);
return path
.parent()
.unwrap()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_string();
}
"file.folder" => {
let path = std::path::Path::new(&self.path);
return path.parent().unwrap().to_str().unwrap().to_string();
}
"file.ext" => {
let path = std::path::Path::new(&self.path);
return path.extension().unwrap().to_str().unwrap().to_string();
}
"file.size" => {
let path = std::path::Path::new(&self.path);
return path.metadata().unwrap().len().to_string();
}
"file.ctime" => {
let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.to_rfc3339();
}
"file.cday" => {
let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.format("%Y-%m-%d")
.to_string();
}
"file.mtime" => {
let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.to_rfc3339();
}
"file.mday" => {
let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.format("%Y-%m-%d")
.to_string();
}
"file.path" => {
return self.path.clone();
}
_ => {}
}
self.frontmatter
.as_mapping()
.unwrap()
.get(key)
.map_or_else(String::new, stringify)
}
}
fn stringify(val: &serde_yaml::Value) -> String {
match val {
serde_yaml::Value::Null => String::new(),
serde_yaml::Value::Bool(b) => b.to_string(),
serde_yaml::Value::Number(n) => n.to_string(),
serde_yaml::Value::String(s) => s.to_owned(),
serde_yaml::Value::Sequence(_) => serde_json::to_string(&val).unwrap(),
serde_yaml::Value::Mapping(_o) => todo!(),
serde_yaml::Value::Tagged(_) => unimplemented!(),
}
}

View file

@ -1,6 +1,6 @@
use std::io::IsTerminal;
use mdq::{filter_documents, scan_dir, select_columns};
use mdq::Index;
mod args;
@ -17,11 +17,9 @@ fn main() {
let output_json = args.get_flag("json");
let limit: Option<usize> = if let Some(limit_arg) = args.get_one::<String>("limit") {
limit_arg.parse().ok()
} else {
None
};
let limit: usize = args.get_one::<String>("limit").unwrap().parse().unwrap();
let offset: usize = args.get_one::<String>("offset").unwrap().parse().unwrap();
let columns: Vec<_> = args
.get_many::<String>("column")
@ -30,41 +28,30 @@ fn main() {
.collect();
log::info!("selected columns: {columns:?}");
let columns: Vec<(_, _)> = columns
let (columns, headers): (Vec<_>, Vec<_>) = columns
.into_iter()
.map(|x| {
let (column, header_rename) = x.split_once(':').unwrap_or((&x, &x));
(column.to_owned(), header_rename.to_owned())
})
.collect();
.unzip();
let (columns, headers): (Vec<_>, Vec<_>) = columns.into_iter().unzip();
let filters: Vec<_> = if let Some(filters) = args.get_many::<String>("filter") {
filters.collect()
} else {
vec![]
};
let filters = args
.get_many::<String>("filter")
.map_or_else(std::vec::Vec::new, std::iter::Iterator::collect);
let filters: Vec<_> = filters
.into_iter()
.map(|x| txd::filter::parse_condition(x).expect("failed to parse filter"))
.collect();
let mut i = scan_dir(root_dir);
let mut i = Index::new(root_dir);
if !filters.is_empty() {
i = filter_documents(i, &filters);
i = i.filter_documents(&filters);
}
let data = if let Some(limit) = limit {
select_columns(&i, &columns.clone())
.into_iter()
.take(limit)
.collect::<Vec<_>>()
} else {
select_columns(&i, &columns.clone())
};
let data = i.select_columns(&columns, limit, offset);
if output_json {
let mut data = serde_json::json!(
@ -86,14 +73,23 @@ fn main() {
return;
}
if !std::io::stdout().is_terminal() {
let mut writer = csv::WriterBuilder::new().from_writer(vec![]);
writer.write_record(headers).unwrap();
for e in data {
writer.write_record(e).unwrap();
}
print!(
"{}",
String::from_utf8(writer.into_inner().unwrap()).unwrap()
);
return;
}
let mut table = comfy_table::Table::new();
table.set_header(headers);
table.load_preset(comfy_table::presets::UTF8_FULL_CONDENSED);
if !std::io::stdout().is_terminal() {
// TODO : Output as CSV?
table.load_preset(comfy_table::presets::NOTHING);
}
table.add_rows(data);
println!("{table}");