From 907ed2a2efe4d14af5bf80b7493dca59a9ca79d1 Mon Sep 17 00:00:00 2001 From: JMARyA Date: Mon, 3 Mar 2025 01:35:14 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 70 +++++++++++++++++++++++++--------------------- src/archive/mod.rs | 41 +++++++++++++++++++++++++-- src/lib.rs | 2 +- 3 files changed, 78 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f66167..0f921cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,9 +255,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" dependencies = [ "serde", ] @@ -307,9 +307,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "cc" -version = "1.2.15" +version = "1.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" +checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" dependencies = [ "shlex", ] @@ -328,9 +328,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", @@ -338,7 +338,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -583,7 +583,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "proc-macro2", "proc-macro2-diagnostics", "quote", @@ -1567,9 +1567,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "litemap" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" [[package]] name = "lock_api" @@ -1833,9 +1833,9 @@ dependencies = [ [[package]] name = "ollama-rs" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "269d1ec6f5f1b7a7b7413ab7eacb65177462f086293b4039bc43ee8bbed53836" +checksum = "a5df54edb7e1264719be607cd40590d3769b5b35a2623e6e02681e6591aea5b8" dependencies = [ "async-stream", "log", @@ -1860,7 +1860,7 @@ version = "0.10.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "foreign-types", "libc", @@ -2188,7 +2188,7 @@ version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -2499,7 +2499,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "errno", "libc", "linux-raw-sys", @@ -2586,9 +2586,9 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", "indexmap 1.9.3", @@ -2599,9 +2599,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" dependencies = [ "proc-macro2", "quote", @@ -2627,7 +2627,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "core-foundation", "core-foundation-sys", "libc", @@ -2919,7 +2919,7 @@ checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.8.0", + "bitflags 2.9.0", "byteorder", "bytes", "chrono", @@ -2963,7 +2963,7 @@ checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.8.0", + "bitflags 2.9.0", "byteorder", "chrono", "crc", @@ -3151,7 +3151,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "core-foundation", "system-configuration-sys 0.6.0", ] @@ -3357,9 +3357,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ "rustls", "tokio", @@ -3650,9 +3650,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.14.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1" +checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587" dependencies = [ "getrandom 0.3.1", "serde", @@ -3886,6 +3886,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-link" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" + [[package]] name = "windows-registry" version = "0.2.0" @@ -4089,7 +4095,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -4171,18 +4177,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", diff --git a/src/archive/mod.rs b/src/archive/mod.rs index 6046881..7a65085 100644 --- a/src/archive/mod.rs +++ b/src/archive/mod.rs @@ -301,6 +301,30 @@ pub async fn index_path(dom: &Domain, path: &str) { pub async fn index_document(doc: &Document) { for version_str in &doc.versions() { + let domain = &doc.domain; + let path = &doc.path; + let version = + if let Ok(version) = chrono::NaiveDate::parse_from_str(&version_str, "%Y-%m-%d") { + version + } else { + log::error!( + "Could not parse version {version_str} as valid date for {} / {}", + domain, + path + ); + continue; + }; + + if DocumentIndex::exists(domain, path, &version).await { + log::info!( + "Document {} / {} @ {} already indexed", + domain, + path, + version + ); + continue; + } + if let Ok(content) = doc .render_local( Some(version_str.to_string()), @@ -309,7 +333,7 @@ pub async fn index_document(doc: &Document) { .await { let size = content.len(); - let mime = get_mime_type(&content).unwrap_or_default(); + let mime = get_mime_type(&content).unwrap_or("text/html".to_string()); if mime.as_str() == "text/html" { // TODO : domain links index @@ -319,7 +343,7 @@ pub async fn index_document(doc: &Document) { for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) { let hash = sha256_hash(&data); - println!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path); + log::info!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path); hashes.push(hash.clone()); sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING") @@ -370,6 +394,19 @@ pub struct DocumentIndex { } impl DocumentIndex { + pub async fn exists(domain: &str, path: &str, version: &chrono::NaiveDate) -> bool { + let res: Option = sqlx::query_as( + "SELECT * FROM document_index WHERE domain = $1 AND path = $2 AND version = $3", + ) + .bind(domain) + .bind(path) + .bind(version) + .fetch_optional(get_pg!()) + .await + .unwrap(); + res.is_some() + } + pub fn url(&self) -> String { format!( "/s/{}/{}?time={}", diff --git a/src/lib.rs b/src/lib.rs index 0f94acf..8ab20a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,7 @@ pub fn get_mime_type(content: &[u8]) -> std::io::Result { .spawn()?; if let Some(mut stdin) = child.stdin.take() { - stdin.write_all(content)?; + let _ = stdin.write_all(content); } let output = child.wait_with_output()?;