♻️ refactor
All checks were successful
ci/woodpecker/push/build Pipeline was successful

This commit is contained in:
JMARyA 2025-03-03 01:35:14 +01:00
parent 2e7b192ce1
commit 907ed2a2ef
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
3 changed files with 78 additions and 35 deletions

View file

@ -301,6 +301,30 @@ pub async fn index_path(dom: &Domain, path: &str) {
pub async fn index_document(doc: &Document) {
for version_str in &doc.versions() {
let domain = &doc.domain;
let path = &doc.path;
let version =
if let Ok(version) = chrono::NaiveDate::parse_from_str(&version_str, "%Y-%m-%d") {
version
} else {
log::error!(
"Could not parse version {version_str} as valid date for {} / {}",
domain,
path
);
continue;
};
if DocumentIndex::exists(domain, path, &version).await {
log::info!(
"Document {} / {} @ {} already indexed",
domain,
path,
version
);
continue;
}
if let Ok(content) = doc
.render_local(
Some(version_str.to_string()),
@ -309,7 +333,7 @@ pub async fn index_document(doc: &Document) {
.await
{
let size = content.len();
let mime = get_mime_type(&content).unwrap_or_default();
let mime = get_mime_type(&content).unwrap_or("text/html".to_string());
if mime.as_str() == "text/html" {
// TODO : domain links index
@ -319,7 +343,7 @@ pub async fn index_document(doc: &Document) {
for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) {
let hash = sha256_hash(&data);
println!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
log::info!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
hashes.push(hash.clone());
sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
@ -370,6 +394,19 @@ pub struct DocumentIndex {
}
impl DocumentIndex {
pub async fn exists(domain: &str, path: &str, version: &chrono::NaiveDate) -> bool {
let res: Option<Self> = sqlx::query_as(
"SELECT * FROM document_index WHERE domain = $1 AND path = $2 AND version = $3",
)
.bind(domain)
.bind(path)
.bind(version)
.fetch_optional(get_pg!())
.await
.unwrap();
res.is_some()
}
pub fn url(&self) -> String {
format!(
"/s/{}/{}?time={}",

View file

@ -25,7 +25,7 @@ pub fn get_mime_type(content: &[u8]) -> std::io::Result<String> {
.spawn()?;
if let Some(mut stdin) = child.stdin.take() {
stdin.write_all(content)?;
let _ = stdin.write_all(content);
}
let output = child.wait_with_output()?;