diff --git a/Cargo.lock b/Cargo.lock index adbd6ae..4aaafb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,6 +200,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "env_logger" version = "0.10.0" @@ -347,6 +368,7 @@ dependencies = [ "chrono", "clap", "comfy-table", + "csv", "env_logger", "log", "regex", diff --git a/Cargo.toml b/Cargo.toml index fa638a5..b8a09d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ comfy-table = "7.1.0" env_logger = "0.10.0" log = "0.4.20" chrono = "0.4.31" +csv = "1.3.0" diff --git a/src/args.rs b/src/args.rs index 11d75c5..e9b6256 100644 --- a/src/args.rs +++ b/src/args.rs @@ -5,7 +5,18 @@ pub fn get_args() -> ArgMatches { .about("Query markdown files") .arg(arg!([dir] "Directory to scan").required(true)) .arg(arg!(-j --json "Output result as JSON").required(false)) - .arg(arg!(-l --limit "Limit number of results returned").required(false)) + .arg( + arg!(-l --limit "Limit number of results returned") + .required(false) + .default_value("0") + .allow_negative_numbers(false), + ) + .arg( + arg!(--offset "Offset results by a factor. Useful when used with --limit") + .required(false) + .allow_negative_numbers(false) + .default_value("0"), + ) .arg(arg!(-f --filter ... "Filter to apply to the documents").required(false)) .arg( arg!(-c --column ... "Specify output columns") diff --git a/src/lib.rs b/src/lib.rs index 588e202..9e45287 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,13 +5,11 @@ use txd::DataType; pub fn get_frontmatter(markdown: &str) -> Option { let frontmatter_regex = regex::Regex::new(r"(?s)^---\s*\n(.*?)\n---").unwrap(); - if let Some(captures) = frontmatter_regex.captures(markdown) { + frontmatter_regex.captures(markdown).and_then(|captures| { let frontmatter = captures.get(1).map(|m| m.as_str().to_string()); frontmatter - } else { - None - } + }) } fn system_time_to_date_time(t: std::time::SystemTime) -> chrono::DateTime { @@ -31,7 +29,7 @@ fn system_time_to_date_time(t: std::time::SystemTime) -> chrono::DateTime, } -/// Create a markdown document index over `dir` -pub fn scan_dir(dir: &str) -> Index { - let mut i = Index { documents: vec![] }; +type Table = Vec>; - for e in walkdir::WalkDir::new(dir) - .into_iter() - .filter_map(std::result::Result::ok) - { - if e.path().is_dir() { - continue; - } - if e.path().extension().is_none() { - continue; - } - if e.path().extension().unwrap().to_str().unwrap() == "md" { - let path = e.path().to_str().unwrap().to_owned(); - let content = std::fs::read_to_string(&path).unwrap(); - let frontmatter = get_frontmatter(&content); - if let Some(frontmatter) = frontmatter { - let frontmatter = serde_yaml::from_str(&frontmatter).unwrap(); - let doc = Document { path, frontmatter }; - i.documents.push(doc); - } else { - i.documents.push(Document { - path, - frontmatter: serde_yaml::to_value(&serde_yaml::Mapping::new()).unwrap(), - }); +impl Index { + /// Create a markdown document index over `dir` + pub fn new(dir: &str) -> Self { + let mut i = Self { documents: vec![] }; + + for e in walkdir::WalkDir::new(dir) + .into_iter() + .filter_map(std::result::Result::ok) + { + if e.path().is_dir() { + continue; + } + if e.path().extension().is_none() { + continue; + } + if e.path().extension().unwrap().to_str().unwrap() == "md" { + let path = e.path().to_str().unwrap().to_owned(); + let content = std::fs::read_to_string(&path).unwrap(); + let frontmatter = get_frontmatter(&content); + if let Some(frontmatter) = frontmatter { + let frontmatter = serde_yaml::from_str(&frontmatter).unwrap(); + let doc = Document { path, frontmatter }; + i.documents.push(doc); + } else { + i.documents.push(Document { + path, + frontmatter: serde_yaml::to_value(&serde_yaml::Mapping::new()).unwrap(), + }); + } } } + + i } - i + /// Build a table with specified columns from index within specified scope + #[must_use] + pub fn select_columns(&self, col: &[String], limit: usize, offset: usize) -> Table { + let mut rows = vec![]; + + let scope: Vec<_> = self.documents.clone().into_iter().skip(offset).collect(); + + let scope = if limit == 0 { + scope + } else { + scope.into_iter().take(limit).collect() + }; + + for doc in scope { + let mut rcol = vec![]; + for c in col { + rcol.push(doc.get_key(c)); + } + rows.push(rcol); + } + + rows + } + + /// Apply filters to the documents of the index returning a new filtered index + #[must_use] + pub fn filter_documents(&self, filters: &[txd::filter::Filter]) -> Self { + // TODO : Implement option for chaining filters with AND OR + let docs: Vec<_> = self + .documents + .iter() + .filter(|x| { + let mut is_included = true; + + for f in filters { + let a_str = x.get_key(&f.0); + let mut a = txd::parse(&a_str); + let b = txd::parse(&f.2); + + log::debug!("Trying to compare {a:?} and {b:?} with {:?}", f.1); + + if a_str.is_empty() { + // TODO : Maybe add explicit null instead of empty string + is_included = false; + break; + } + + if !a.same_as(&b) && !matches!(a, DataType::List(_)) { + log::debug!("trying to cast a to string because of different types"); + a = txd::DataType::String(a_str); + } + + if !a.compare(f.1, b) { + is_included = false; + } + } + + is_included + }) + .cloned() + .collect(); + + Self { documents: docs } + } } -/// Get a key from document. -/// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned. -fn get_key(d: &Document, key: &str) -> String { - match key { - "file.title" => { - let path = std::path::Path::new(&d.path); - return path.file_stem().unwrap().to_str().unwrap().to_string(); +impl Document { + /// Get a key from document. + /// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned. + fn get_key(&self, key: &str) -> String { + match key { + "file.title" => { + let path = std::path::Path::new(&self.path); + return path.file_stem().unwrap().to_str().unwrap().to_string(); + } + "file.name" => { + let path = std::path::Path::new(&self.path); + return path.file_name().unwrap().to_str().unwrap().to_string(); + } + "file.parent" => { + let path = std::path::Path::new(&self.path); + return path + .parent() + .unwrap() + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_string(); + } + "file.folder" => { + let path = std::path::Path::new(&self.path); + return path.parent().unwrap().to_str().unwrap().to_string(); + } + "file.ext" => { + let path = std::path::Path::new(&self.path); + return path.extension().unwrap().to_str().unwrap().to_string(); + } + "file.size" => { + let path = std::path::Path::new(&self.path); + return path.metadata().unwrap().len().to_string(); + } + "file.ctime" => { + let path = std::path::Path::new(&self.path); + return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) + .to_rfc3339(); + } + "file.cday" => { + let path = std::path::Path::new(&self.path); + return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) + .format("%Y-%m-%d") + .to_string(); + } + "file.mtime" => { + let path = std::path::Path::new(&self.path); + return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) + .to_rfc3339(); + } + "file.mday" => { + let path = std::path::Path::new(&self.path); + return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) + .format("%Y-%m-%d") + .to_string(); + } + "file.path" => { + return self.path.clone(); + } + _ => {} } - "file.name" => { - let path = std::path::Path::new(&d.path); - return path.file_name().unwrap().to_str().unwrap().to_string(); - } - "file.parent" => { - let path = std::path::Path::new(&d.path); - return path - .parent() - .unwrap() - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_string(); - } - "file.folder" => { - let path = std::path::Path::new(&d.path); - return path.parent().unwrap().to_str().unwrap().to_string(); - } - "file.ext" => { - let path = std::path::Path::new(&d.path); - return path.extension().unwrap().to_str().unwrap().to_string(); - } - "file.size" => { - let path = std::path::Path::new(&d.path); - return path.metadata().unwrap().len().to_string(); - } - "file.ctime" => { - let path = std::path::Path::new(&d.path); - return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) - .to_rfc3339(); - } - "file.cday" => { - let path = std::path::Path::new(&d.path); - return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) - .format("%Y-%m-%d") - .to_string(); - } - "file.mtime" => { - let path = std::path::Path::new(&d.path); - return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) - .to_rfc3339(); - } - "file.mday" => { - let path = std::path::Path::new(&d.path); - return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) - .format("%Y-%m-%d") - .to_string(); - } - "file.path" => { - return d.path.clone(); - } - _ => {} - } - if let Some(val) = d.frontmatter.as_mapping().unwrap().get(key) { - stringify(val) - } else { - String::new() + self.frontmatter + .as_mapping() + .unwrap() + .get(key) + .map_or_else(String::new, stringify) } } @@ -156,64 +225,3 @@ fn stringify(val: &serde_yaml::Value) -> String { serde_yaml::Value::Tagged(_) => unimplemented!(), } } - -type Table = Vec>; - -/// Build a table with specified columns from index -#[must_use] -pub fn select_columns(i: &Index, col: &[String]) -> Table { - let mut rows = vec![]; - - for doc in &i.documents { - let mut rcol = vec![]; - for c in col { - rcol.push(get_key(doc, c)); - } - rows.push(rcol); - } - - rows -} - -/// Apply filters to the documents of the index returning a new filtered index -#[must_use] -pub fn filter_documents(i: Index, filters: &[txd::filter::Filter]) -> Index { - // TODO : Implement option for chaining filters with AND OR - let docs: Vec<_> = i - .documents - .into_iter() - .filter_map(|x| { - let mut is_included = true; - - for f in filters { - let a_str = get_key(&x, &f.0); - let mut a = txd::parse(&a_str); - let b = txd::parse(&f.2); - - log::debug!("Trying to compare {a:?} and {b:?} with {:?}", f.1); - - if a_str.is_empty() { - // TODO : Maybe add explicit null instead of empty string - is_included = false; - break; - } - - if !a.same_as(&b) && !matches!(a, DataType::List(_)) { - log::debug!("trying to cast a to string because of different types"); - a = txd::DataType::String(a_str); - } - - if !a.compare(f.1, b) { - is_included = false; - } - } - if is_included { - Some(x) - } else { - None - } - }) - .collect(); - - Index { documents: docs } -} diff --git a/src/main.rs b/src/main.rs index e5b11a9..ee65def 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::io::IsTerminal; -use mdq::{filter_documents, scan_dir, select_columns}; +use mdq::Index; mod args; @@ -17,11 +17,9 @@ fn main() { let output_json = args.get_flag("json"); - let limit: Option = if let Some(limit_arg) = args.get_one::("limit") { - limit_arg.parse().ok() - } else { - None - }; + let limit: usize = args.get_one::("limit").unwrap().parse().unwrap(); + + let offset: usize = args.get_one::("offset").unwrap().parse().unwrap(); let columns: Vec<_> = args .get_many::("column") @@ -30,41 +28,30 @@ fn main() { .collect(); log::info!("selected columns: {columns:?}"); - let columns: Vec<(_, _)> = columns + let (columns, headers): (Vec<_>, Vec<_>) = columns .into_iter() .map(|x| { let (column, header_rename) = x.split_once(':').unwrap_or((&x, &x)); (column.to_owned(), header_rename.to_owned()) }) - .collect(); + .unzip(); - let (columns, headers): (Vec<_>, Vec<_>) = columns.into_iter().unzip(); - - let filters: Vec<_> = if let Some(filters) = args.get_many::("filter") { - filters.collect() - } else { - vec![] - }; + let filters = args + .get_many::("filter") + .map_or_else(std::vec::Vec::new, std::iter::Iterator::collect); let filters: Vec<_> = filters .into_iter() .map(|x| txd::filter::parse_condition(x).expect("failed to parse filter")) .collect(); - let mut i = scan_dir(root_dir); + let mut i = Index::new(root_dir); if !filters.is_empty() { - i = filter_documents(i, &filters); + i = i.filter_documents(&filters); } - let data = if let Some(limit) = limit { - select_columns(&i, &columns.clone()) - .into_iter() - .take(limit) - .collect::>() - } else { - select_columns(&i, &columns.clone()) - }; + let data = i.select_columns(&columns, limit, offset); if output_json { let mut data = serde_json::json!( @@ -86,14 +73,23 @@ fn main() { return; } + if !std::io::stdout().is_terminal() { + let mut writer = csv::WriterBuilder::new().from_writer(vec![]); + writer.write_record(headers).unwrap(); + for e in data { + writer.write_record(e).unwrap(); + } + print!( + "{}", + String::from_utf8(writer.into_inner().unwrap()).unwrap() + ); + return; + } + let mut table = comfy_table::Table::new(); table.set_header(headers); table.load_preset(comfy_table::presets::UTF8_FULL_CONDENSED); - if !std::io::stdout().is_terminal() { - // TODO : Output as CSV? - table.load_preset(comfy_table::presets::NOTHING); - } table.add_rows(data); println!("{table}");