♻️ refactor
All checks were successful
ci/woodpecker/push/build Pipeline was successful

This commit is contained in:
JMARyA 2025-03-03 01:35:14 +01:00
parent 2e7b192ce1
commit 907ed2a2ef
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
3 changed files with 78 additions and 35 deletions

70
Cargo.lock generated
View file

@ -255,9 +255,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.8.0" version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -307,9 +307,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.15" version = "1.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
dependencies = [ dependencies = [
"shlex", "shlex",
] ]
@ -328,9 +328,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.39" version = "0.4.40"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
dependencies = [ dependencies = [
"android-tzdata", "android-tzdata",
"iana-time-zone", "iana-time-zone",
@ -338,7 +338,7 @@ dependencies = [
"num-traits", "num-traits",
"serde", "serde",
"wasm-bindgen", "wasm-bindgen",
"windows-targets 0.52.6", "windows-link",
] ]
[[package]] [[package]]
@ -583,7 +583,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7" checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
"proc-macro2", "proc-macro2",
"proc-macro2-diagnostics", "proc-macro2-diagnostics",
"quote", "quote",
@ -1567,9 +1567,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]] [[package]]
name = "litemap" name = "litemap"
version = "0.7.4" version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@ -1833,9 +1833,9 @@ dependencies = [
[[package]] [[package]]
name = "ollama-rs" name = "ollama-rs"
version = "0.2.5" version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "269d1ec6f5f1b7a7b7413ab7eacb65177462f086293b4039bc43ee8bbed53836" checksum = "a5df54edb7e1264719be607cd40590d3769b5b35a2623e6e02681e6591aea5b8"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"log", "log",
@ -1860,7 +1860,7 @@ version = "0.10.71"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
"cfg-if", "cfg-if",
"foreign-types", "foreign-types",
"libc", "libc",
@ -2188,7 +2188,7 @@ version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f" checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
] ]
[[package]] [[package]]
@ -2499,7 +2499,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -2586,9 +2586,9 @@ dependencies = [
[[package]] [[package]]
name = "schemars" name = "schemars"
version = "0.8.21" version = "0.8.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
dependencies = [ dependencies = [
"dyn-clone", "dyn-clone",
"indexmap 1.9.3", "indexmap 1.9.3",
@ -2599,9 +2599,9 @@ dependencies = [
[[package]] [[package]]
name = "schemars_derive" name = "schemars_derive"
version = "0.8.21" version = "0.8.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -2627,7 +2627,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
"core-foundation", "core-foundation",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
@ -2919,7 +2919,7 @@ checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233"
dependencies = [ dependencies = [
"atoi", "atoi",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.8.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"bytes", "bytes",
"chrono", "chrono",
@ -2963,7 +2963,7 @@ checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613"
dependencies = [ dependencies = [
"atoi", "atoi",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.8.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"chrono", "chrono",
"crc", "crc",
@ -3151,7 +3151,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
"core-foundation", "core-foundation",
"system-configuration-sys 0.6.0", "system-configuration-sys 0.6.0",
] ]
@ -3357,9 +3357,9 @@ dependencies = [
[[package]] [[package]]
name = "tokio-rustls" name = "tokio-rustls"
version = "0.26.1" version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
dependencies = [ dependencies = [
"rustls", "rustls",
"tokio", "tokio",
@ -3650,9 +3650,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.14.0" version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1" checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587"
dependencies = [ dependencies = [
"getrandom 0.3.1", "getrandom 0.3.1",
"serde", "serde",
@ -3886,6 +3886,12 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "windows-link"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3"
[[package]] [[package]]
name = "windows-registry" name = "windows-registry"
version = "0.2.0" version = "0.2.0"
@ -4089,7 +4095,7 @@ version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.0",
] ]
[[package]] [[package]]
@ -4171,18 +4177,18 @@ dependencies = [
[[package]] [[package]]
name = "zerofrom" name = "zerofrom"
version = "0.1.5" version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
dependencies = [ dependencies = [
"zerofrom-derive", "zerofrom-derive",
] ]
[[package]] [[package]]
name = "zerofrom-derive" name = "zerofrom-derive"
version = "0.1.5" version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View file

@ -301,6 +301,30 @@ pub async fn index_path(dom: &Domain, path: &str) {
pub async fn index_document(doc: &Document) { pub async fn index_document(doc: &Document) {
for version_str in &doc.versions() { for version_str in &doc.versions() {
let domain = &doc.domain;
let path = &doc.path;
let version =
if let Ok(version) = chrono::NaiveDate::parse_from_str(&version_str, "%Y-%m-%d") {
version
} else {
log::error!(
"Could not parse version {version_str} as valid date for {} / {}",
domain,
path
);
continue;
};
if DocumentIndex::exists(domain, path, &version).await {
log::info!(
"Document {} / {} @ {} already indexed",
domain,
path,
version
);
continue;
}
if let Ok(content) = doc if let Ok(content) = doc
.render_local( .render_local(
Some(version_str.to_string()), Some(version_str.to_string()),
@ -309,7 +333,7 @@ pub async fn index_document(doc: &Document) {
.await .await
{ {
let size = content.len(); let size = content.len();
let mime = get_mime_type(&content).unwrap_or_default(); let mime = get_mime_type(&content).unwrap_or("text/html".to_string());
if mime.as_str() == "text/html" { if mime.as_str() == "text/html" {
// TODO : domain links index // TODO : domain links index
@ -319,7 +343,7 @@ pub async fn index_document(doc: &Document) {
for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) { for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) {
let hash = sha256_hash(&data); let hash = sha256_hash(&data);
println!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path); log::info!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
hashes.push(hash.clone()); hashes.push(hash.clone());
sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING") sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
@ -370,6 +394,19 @@ pub struct DocumentIndex {
} }
impl DocumentIndex { impl DocumentIndex {
pub async fn exists(domain: &str, path: &str, version: &chrono::NaiveDate) -> bool {
let res: Option<Self> = sqlx::query_as(
"SELECT * FROM document_index WHERE domain = $1 AND path = $2 AND version = $3",
)
.bind(domain)
.bind(path)
.bind(version)
.fetch_optional(get_pg!())
.await
.unwrap();
res.is_some()
}
pub fn url(&self) -> String { pub fn url(&self) -> String {
format!( format!(
"/s/{}/{}?time={}", "/s/{}/{}?time={}",

View file

@ -25,7 +25,7 @@ pub fn get_mime_type(content: &[u8]) -> std::io::Result<String> {
.spawn()?; .spawn()?;
if let Some(mut stdin) = child.stdin.take() { if let Some(mut stdin) = child.stdin.take() {
stdin.write_all(content)?; let _ = stdin.write_all(content);
} }
let output = child.wait_with_output()?; let output = child.wait_with_output()?;