add jsonfilter

This commit is contained in:
JMARyA 2024-02-09 13:45:15 +01:00
parent 5ff3e9e94f
commit cee90de28f
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
4 changed files with 151 additions and 122 deletions

53
Cargo.lock generated
View file

@ -333,6 +333,15 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "jsonfilter"
version = "0.1.0"
dependencies = [
"regex",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.150" version = "0.2.150"
@ -370,12 +379,12 @@ dependencies = [
"comfy-table", "comfy-table",
"csv", "csv",
"env_logger", "env_logger",
"jsonfilter",
"log", "log",
"regex", "regex",
"serde", "serde",
"serde_json", "serde_json",
"serde_yaml", "serde_yaml",
"txd",
"walkdir", "walkdir",
] ]
@ -425,18 +434,18 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.70" version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.33" version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
@ -452,9 +461,9 @@ dependencies = [
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.10.2" version = "1.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
@ -464,9 +473,9 @@ dependencies = [
[[package]] [[package]]
name = "regex-automata" name = "regex-automata"
version = "0.4.3" version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
@ -521,18 +530,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.193" version = "1.0.196"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.193" version = "1.0.196"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -541,9 +550,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.108" version = "1.0.113"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -596,9 +605,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.39" version = "2.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -614,16 +623,6 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "txd"
version = "0.1.0"
source = "git+https://git.hydrar.de/jmarya/txd#657b5dc2a76342d1021452ca226da9b3e19f7b82"
dependencies = [
"chrono",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.12" version = "1.0.12"

View file

@ -15,10 +15,10 @@ regex = "1.10.2"
serde = "1.0.189" serde = "1.0.189"
serde_yaml = "0.9.25" serde_yaml = "0.9.25"
walkdir = "2.4.0" walkdir = "2.4.0"
txd = { git = "https://git.hydrar.de/jmarya/txd" }
serde_json = "1.0.107" serde_json = "1.0.107"
comfy-table = "7.1.0" comfy-table = "7.1.0"
env_logger = "0.10.0" env_logger = "0.10.0"
log = "0.4.20" log = "0.4.20"
chrono = "0.4.31" chrono = "0.4.31"
csv = "1.3.0" csv = "1.3.0"
jsonfilter = { path = "../../Gitea/jsonfilter" }

View file

@ -1,6 +1,7 @@
use std::collections::{HashMap, HashSet}; use std::{
cmp::Ordering,
use txd::DataType; collections::{HashMap, HashSet},
};
/// get frontmatter from markdown document /// get frontmatter from markdown document
#[must_use] #[must_use]
@ -14,6 +15,28 @@ pub fn get_frontmatter(markdown: &str) -> Option<String> {
}) })
} }
trait ToYaml {
fn to_yaml(&self) -> serde_yaml::Value;
}
impl ToYaml for serde_json::Value {
fn to_yaml(&self) -> serde_yaml::Value {
let str = serde_yaml::to_string(self).unwrap();
return serde_yaml::from_str(&str).unwrap();
}
}
trait ToJson {
fn to_json(&self) -> serde_json::Value;
}
impl ToJson for serde_yaml::Value {
fn to_json(&self) -> serde_json::Value {
let str = serde_json::to_string(self).unwrap();
return serde_json::from_str(&str).unwrap();
}
}
/// get inline #tags from markdown file /// get inline #tags from markdown file
#[must_use] #[must_use]
pub fn get_inline_tags(markdown: &str) -> Vec<String> { pub fn get_inline_tags(markdown: &str) -> Vec<String> {
@ -118,20 +141,10 @@ impl Index {
if let Some(sort) = sort { if let Some(sort) = sort {
scope.sort_by(|a, b| { scope.sort_by(|a, b| {
let a_str = a.get_key(&sort); let a_str: serde_json::Value = a.get_key(&sort);
let b_str = b.get_key(&sort); let b_str: serde_json::Value = b.get_key(&sort);
let mut a = txd::parse(&a_str);
let mut b = txd::parse(&b_str);
log::debug!("Trying to order {a:?} and {b:?}",); jsonfilter::order(&a_str, &b_str)
if !a.same_as(&b) {
log::debug!("trying to cast a to string because of different types");
a = txd::DataType::String(a_str);
b = txd::DataType::String(b_str);
}
a.order_with(&b).unwrap()
}); });
} }
@ -155,7 +168,10 @@ impl Index {
let mut grouped_items: HashMap<String, Vec<Document>> = HashMap::new(); let mut grouped_items: HashMap<String, Vec<Document>> = HashMap::new();
for doc in self.documents.clone() { for doc in self.documents.clone() {
grouped_items.entry(doc.get_key(key)).or_default().push(doc); grouped_items
.entry(stringify(&doc.get_key(key).to_yaml()))
.or_default()
.push(doc);
} }
grouped_items grouped_items
@ -171,7 +187,7 @@ impl Index {
for doc in &self.documents { for doc in &self.documents {
let mut rcol = vec![]; let mut rcol = vec![];
for c in col { for c in col {
rcol.push(doc.get_key(c)); rcol.push(stringify(&doc.get_key(c).to_yaml()));
} }
rows.push(rcol); rows.push(rcol);
} }
@ -181,42 +197,21 @@ impl Index {
/// Apply filters to the documents of the index returning a new filtered index /// Apply filters to the documents of the index returning a new filtered index
#[must_use] #[must_use]
pub fn filter_documents(&self, filters: &[txd::filter::Filter]) -> Self { pub fn filter_documents(&self, filters: &serde_json::Value) -> Self {
// TODO : Implement option for chaining filters with AND OR
let docs: Vec<_> = self let docs: Vec<_> = self
.documents .documents
.iter() .iter()
.filter(|x| { .filter(|x| {
let mut is_included = true; let res = jsonfilter::try_matches(filters, &x.get_full_frontmatter());
match res {
for f in filters { Ok(valid) => Ok(valid),
let a_str = x.get_key(&f.0); Err(e) => match e {
let mut a = txd::parse(&a_str); jsonfilter::FilterError::InvalidFilter => Err(e),
let b = txd::parse(&f.2); jsonfilter::FilterError::UnknownOperator => Err(e),
jsonfilter::FilterError::KeyNotFound => Ok(false),
log::debug!( },
"Trying to compare '{}' = {a:?} and {b:?} with {:?}",
f.0,
f.1
);
if a_str.is_empty() {
// TODO : Maybe add explicit null instead of empty string
is_included = false;
break;
}
if !a.same_as(&b) && !matches!(a, DataType::List(_)) {
log::debug!("trying to cast a to string because of different types");
a = txd::DataType::String(a_str);
}
if !a.compare(f.1, &b).unwrap() {
is_included = false;
}
} }
.unwrap()
is_included
}) })
.cloned() .cloned()
.collect(); .collect();
@ -228,63 +223,80 @@ impl Index {
impl Document { impl Document {
/// Get a key from document. /// Get a key from document.
/// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned. /// This will return internal properties first, then it will search the document frontmatter for the key and return it. If nothing was found an empty string is returned.
fn get_key(&self, key: &str) -> String { fn get_key(&self, key: &str) -> serde_json::Value {
match key { match key {
"file.title" => { "file.title" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path.file_stem().unwrap().to_str().unwrap().to_string(); return serde_json::Value::String(
path.file_stem().unwrap().to_str().unwrap().to_string(),
);
} }
"file.name" => { "file.name" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path.file_name().unwrap().to_str().unwrap().to_string(); return serde_json::Value::String(
path.file_name().unwrap().to_str().unwrap().to_string(),
);
} }
"file.parent" => { "file.parent" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path return serde_json::Value::String(
.parent() path.parent()
.unwrap() .unwrap()
.file_name() .file_name()
.unwrap() .unwrap()
.to_str() .to_str()
.unwrap() .unwrap()
.to_string(); .to_string(),
);
} }
"file.folder" => { "file.folder" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path.parent().unwrap().to_str().unwrap().to_string(); return serde_json::Value::String(
path.parent().unwrap().to_str().unwrap().to_string(),
);
} }
"file.ext" => { "file.ext" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path.extension().unwrap().to_str().unwrap().to_string(); return serde_json::Value::String(
path.extension().unwrap().to_str().unwrap().to_string(),
);
} }
"file.size" => { "file.size" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return path.metadata().unwrap().len().to_string(); return serde_json::Value::String(path.metadata().unwrap().len().to_string());
} }
"file.ctime" => { "file.ctime" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) return serde_json::Value::String(
.to_rfc3339(); system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.to_rfc3339(),
);
} }
"file.cday" => { "file.cday" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().created().unwrap()) return serde_json::Value::String(
.format("%Y-%m-%d") system_time_to_date_time(path.metadata().unwrap().created().unwrap())
.to_string(); .format("%Y-%m-%d")
.to_string(),
);
} }
"file.mtime" => { "file.mtime" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) return serde_json::Value::String(
.to_rfc3339(); system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.to_rfc3339(),
);
} }
"file.mday" => { "file.mday" => {
let path = std::path::Path::new(&self.path); let path = std::path::Path::new(&self.path);
return system_time_to_date_time(path.metadata().unwrap().modified().unwrap()) return serde_json::Value::String(
.format("%Y-%m-%d") system_time_to_date_time(path.metadata().unwrap().modified().unwrap())
.to_string(); .format("%Y-%m-%d")
.to_string(),
);
} }
"file.path" => { "file.path" => {
return self.path.clone(); return serde_json::Value::String(self.path.clone());
} }
_ => {} _ => {}
} }
@ -298,27 +310,44 @@ impl Document {
.unwrap() .unwrap()
.get(split_path.first().unwrap()); .get(split_path.first().unwrap());
if data.is_none() { if data.is_none() {
return String::new(); return serde_json::Value::Null;
} }
let mut data = data.unwrap(); let mut data = data.unwrap();
for path in &split_path[1..] { for path in &split_path[1..] {
let data_opt = data.as_mapping().unwrap().get(path); let data_opt = data.as_mapping().unwrap().get(path);
if data_opt.is_none() { if data_opt.is_none() {
return String::new(); return serde_json::Value::Null;
} }
data = data_opt.unwrap(); data = data_opt.unwrap();
} }
stringify(data) data.to_json()
} else { } else {
self.frontmatter self.frontmatter
.as_mapping() .as_mapping()
.unwrap() .unwrap()
.get(key) .get(key)
.map_or_else(String::new, stringify) .map_or_else(|| serde_json::Value::Null, |x| x.to_json())
} }
} }
pub fn get_full_frontmatter(&self) -> serde_json::Value {
let mut frontmatter = self.frontmatter.to_json();
let frontmatter_obj = frontmatter.as_object_mut().unwrap();
frontmatter_obj.insert("file.title".into(), self.get_key("file.title"));
frontmatter_obj.insert("file.name".into(), self.get_key("file.name"));
frontmatter_obj.insert("file.parent".into(), self.get_key("file.parent"));
frontmatter_obj.insert("file.folder".into(), self.get_key("file.folder"));
frontmatter_obj.insert("file.ext".into(), self.get_key("file.ext"));
frontmatter_obj.insert("file.size".into(), self.get_key("file.size"));
frontmatter_obj.insert("file.ctime".into(), self.get_key("file.ctime"));
frontmatter_obj.insert("file.cday".into(), self.get_key("file.cday"));
frontmatter_obj.insert("file.mtime".into(), self.get_key("file.mtime"));
frontmatter_obj.insert("file.mday".into(), self.get_key("file.mday"));
frontmatter_obj.insert("file.path".into(), self.get_key("file.path"));
frontmatter
}
} }
fn stringify(val: &serde_yaml::Value) -> String { fn stringify(val: &serde_yaml::Value) -> String {

View file

@ -55,14 +55,23 @@ fn main() {
.map_or_else(std::vec::Vec::new, std::iter::Iterator::collect); .map_or_else(std::vec::Vec::new, std::iter::Iterator::collect);
log::debug!("raw filters: {filters:?}"); log::debug!("raw filters: {filters:?}");
let filters: Vec<_> = filters
.into_iter() let filters = if filters.len() == 1 {
.map(|x| txd::filter::parse_condition(x).expect("failed to parse filter")) serde_json::from_str(filters.first().unwrap()).unwrap()
.collect(); } else {
let filters: Vec<_> = filters
.iter()
.map(|x| serde_json::from_str::<serde_json::Value>(x).unwrap())
.collect();
serde_json::json!({
"$and": filters
})
};
log::debug!("parsed filters: {filters:?}"); log::debug!("parsed filters: {filters:?}");
let mut i = Index::new(root_dir, ignoretags); let mut i = Index::new(root_dir, ignoretags);
if !filters.is_empty() { if !filters.is_null() {
i = i.filter_documents(&filters); i = i.filter_documents(&filters);
} }
@ -95,18 +104,10 @@ fn main() {
if std::io::stdout().is_terminal() { if std::io::stdout().is_terminal() {
let mut grouped_keys = grouped.iter().map(|(key, _)| key).collect::<Vec<_>>(); let mut grouped_keys = grouped.iter().map(|(key, _)| key).collect::<Vec<_>>();
grouped_keys.sort_by(|a_str, b_str| { grouped_keys.sort_by(|a_str, b_str| {
let mut a = txd::parse(a_str); let a: serde_json::Value = serde_json::from_str(a_str).unwrap();
let mut b = txd::parse(b_str); let b: serde_json::Value = serde_json::from_str(b_str).unwrap();
log::debug!("Trying to order {a:?} and {b:?}",); jsonfilter::order(&a, &b)
if !a.same_as(&b) {
log::debug!("trying to cast a to string because of different types");
a = txd::DataType::String((*a_str).to_string());
b = txd::DataType::String((*b_str).to_string());
}
a.order_with(&b).unwrap()
}); });
for group in grouped_keys { for group in grouped_keys {
println!("# {group}"); println!("# {group}");