This commit is contained in:
parent
2e7b192ce1
commit
907ed2a2ef
3 changed files with 78 additions and 35 deletions
70
Cargo.lock
generated
70
Cargo.lock
generated
|
@ -255,9 +255,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "2.8.0"
|
version = "2.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
|
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
@ -307,9 +307,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.15"
|
version = "1.2.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af"
|
checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"shlex",
|
"shlex",
|
||||||
]
|
]
|
||||||
|
@ -328,9 +328,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chrono"
|
name = "chrono"
|
||||||
version = "0.4.39"
|
version = "0.4.40"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
|
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"android-tzdata",
|
"android-tzdata",
|
||||||
"iana-time-zone",
|
"iana-time-zone",
|
||||||
|
@ -338,7 +338,7 @@ dependencies = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"serde",
|
"serde",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"windows-targets 0.52.6",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -583,7 +583,7 @@ version = "0.4.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7"
|
checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"proc-macro2-diagnostics",
|
"proc-macro2-diagnostics",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -1567,9 +1567,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litemap"
|
name = "litemap"
|
||||||
version = "0.7.4"
|
version = "0.7.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
|
@ -1833,9 +1833,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ollama-rs"
|
name = "ollama-rs"
|
||||||
version = "0.2.5"
|
version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "269d1ec6f5f1b7a7b7413ab7eacb65177462f086293b4039bc43ee8bbed53836"
|
checksum = "a5df54edb7e1264719be607cd40590d3769b5b35a2623e6e02681e6591aea5b8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"log",
|
"log",
|
||||||
|
@ -1860,7 +1860,7 @@ version = "0.10.71"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd"
|
checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"foreign-types",
|
"foreign-types",
|
||||||
"libc",
|
"libc",
|
||||||
|
@ -2188,7 +2188,7 @@ version = "0.5.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
|
checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2499,7 +2499,7 @@ version = "0.38.44"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"errno",
|
"errno",
|
||||||
"libc",
|
"libc",
|
||||||
"linux-raw-sys",
|
"linux-raw-sys",
|
||||||
|
@ -2586,9 +2586,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "schemars"
|
name = "schemars"
|
||||||
version = "0.8.21"
|
version = "0.8.22"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92"
|
checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"dyn-clone",
|
"dyn-clone",
|
||||||
"indexmap 1.9.3",
|
"indexmap 1.9.3",
|
||||||
|
@ -2599,9 +2599,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "schemars_derive"
|
name = "schemars_derive"
|
||||||
version = "0.8.21"
|
version = "0.8.22"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e"
|
checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -2627,7 +2627,7 @@ version = "2.11.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"core-foundation",
|
"core-foundation",
|
||||||
"core-foundation-sys",
|
"core-foundation-sys",
|
||||||
"libc",
|
"libc",
|
||||||
|
@ -2919,7 +2919,7 @@ checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -2963,7 +2963,7 @@ checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
"crc",
|
"crc",
|
||||||
|
@ -3151,7 +3151,7 @@ version = "0.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
|
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
"core-foundation",
|
"core-foundation",
|
||||||
"system-configuration-sys 0.6.0",
|
"system-configuration-sys 0.6.0",
|
||||||
]
|
]
|
||||||
|
@ -3357,9 +3357,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-rustls"
|
name = "tokio-rustls"
|
||||||
version = "0.26.1"
|
version = "0.26.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
|
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rustls",
|
"rustls",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -3650,9 +3650,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uuid"
|
name = "uuid"
|
||||||
version = "1.14.0"
|
version = "1.15.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1"
|
checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom 0.3.1",
|
"getrandom 0.3.1",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -3886,6 +3886,12 @@ dependencies = [
|
||||||
"windows-targets 0.52.6",
|
"windows-targets 0.52.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-registry"
|
name = "windows-registry"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
@ -4089,7 +4095,7 @@ version = "0.33.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.9.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -4171,18 +4177,18 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerofrom"
|
name = "zerofrom"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
|
checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"zerofrom-derive",
|
"zerofrom-derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerofrom-derive"
|
name = "zerofrom-derive"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
|
checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|
|
@ -301,6 +301,30 @@ pub async fn index_path(dom: &Domain, path: &str) {
|
||||||
|
|
||||||
pub async fn index_document(doc: &Document) {
|
pub async fn index_document(doc: &Document) {
|
||||||
for version_str in &doc.versions() {
|
for version_str in &doc.versions() {
|
||||||
|
let domain = &doc.domain;
|
||||||
|
let path = &doc.path;
|
||||||
|
let version =
|
||||||
|
if let Ok(version) = chrono::NaiveDate::parse_from_str(&version_str, "%Y-%m-%d") {
|
||||||
|
version
|
||||||
|
} else {
|
||||||
|
log::error!(
|
||||||
|
"Could not parse version {version_str} as valid date for {} / {}",
|
||||||
|
domain,
|
||||||
|
path
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if DocumentIndex::exists(domain, path, &version).await {
|
||||||
|
log::info!(
|
||||||
|
"Document {} / {} @ {} already indexed",
|
||||||
|
domain,
|
||||||
|
path,
|
||||||
|
version
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if let Ok(content) = doc
|
if let Ok(content) = doc
|
||||||
.render_local(
|
.render_local(
|
||||||
Some(version_str.to_string()),
|
Some(version_str.to_string()),
|
||||||
|
@ -309,7 +333,7 @@ pub async fn index_document(doc: &Document) {
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
let size = content.len();
|
let size = content.len();
|
||||||
let mime = get_mime_type(&content).unwrap_or_default();
|
let mime = get_mime_type(&content).unwrap_or("text/html".to_string());
|
||||||
|
|
||||||
if mime.as_str() == "text/html" {
|
if mime.as_str() == "text/html" {
|
||||||
// TODO : domain links index
|
// TODO : domain links index
|
||||||
|
@ -319,7 +343,7 @@ pub async fn index_document(doc: &Document) {
|
||||||
for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) {
|
for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) {
|
||||||
let hash = sha256_hash(&data);
|
let hash = sha256_hash(&data);
|
||||||
|
|
||||||
println!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
|
log::info!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
|
||||||
|
|
||||||
hashes.push(hash.clone());
|
hashes.push(hash.clone());
|
||||||
sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
|
sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
|
||||||
|
@ -370,6 +394,19 @@ pub struct DocumentIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentIndex {
|
impl DocumentIndex {
|
||||||
|
pub async fn exists(domain: &str, path: &str, version: &chrono::NaiveDate) -> bool {
|
||||||
|
let res: Option<Self> = sqlx::query_as(
|
||||||
|
"SELECT * FROM document_index WHERE domain = $1 AND path = $2 AND version = $3",
|
||||||
|
)
|
||||||
|
.bind(domain)
|
||||||
|
.bind(path)
|
||||||
|
.bind(version)
|
||||||
|
.fetch_optional(get_pg!())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
res.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn url(&self) -> String {
|
pub fn url(&self) -> String {
|
||||||
format!(
|
format!(
|
||||||
"/s/{}/{}?time={}",
|
"/s/{}/{}?time={}",
|
||||||
|
|
|
@ -25,7 +25,7 @@ pub fn get_mime_type(content: &[u8]) -> std::io::Result<String> {
|
||||||
.spawn()?;
|
.spawn()?;
|
||||||
|
|
||||||
if let Some(mut stdin) = child.stdin.take() {
|
if let Some(mut stdin) = child.stdin.take() {
|
||||||
stdin.write_all(content)?;
|
let _ = stdin.write_all(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
let output = child.wait_with_output()?;
|
let output = child.wait_with_output()?;
|
||||||
|
|
Loading…
Add table
Reference in a new issue