This commit is contained in:
parent
2e7b192ce1
commit
907ed2a2ef
3 changed files with 78 additions and 35 deletions
|
@ -301,6 +301,30 @@ pub async fn index_path(dom: &Domain, path: &str) {
|
|||
|
||||
pub async fn index_document(doc: &Document) {
|
||||
for version_str in &doc.versions() {
|
||||
let domain = &doc.domain;
|
||||
let path = &doc.path;
|
||||
let version =
|
||||
if let Ok(version) = chrono::NaiveDate::parse_from_str(&version_str, "%Y-%m-%d") {
|
||||
version
|
||||
} else {
|
||||
log::error!(
|
||||
"Could not parse version {version_str} as valid date for {} / {}",
|
||||
domain,
|
||||
path
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
if DocumentIndex::exists(domain, path, &version).await {
|
||||
log::info!(
|
||||
"Document {} / {} @ {} already indexed",
|
||||
domain,
|
||||
path,
|
||||
version
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(content) = doc
|
||||
.render_local(
|
||||
Some(version_str.to_string()),
|
||||
|
@ -309,7 +333,7 @@ pub async fn index_document(doc: &Document) {
|
|||
.await
|
||||
{
|
||||
let size = content.len();
|
||||
let mime = get_mime_type(&content).unwrap_or_default();
|
||||
let mime = get_mime_type(&content).unwrap_or("text/html".to_string());
|
||||
|
||||
if mime.as_str() == "text/html" {
|
||||
// TODO : domain links index
|
||||
|
@ -319,7 +343,7 @@ pub async fn index_document(doc: &Document) {
|
|||
for (mime, data) in extract_data_urls(&String::from_utf8_lossy(&content)) {
|
||||
let hash = sha256_hash(&data);
|
||||
|
||||
println!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
|
||||
log::info!("{} / {}: Indexing fragment {hash}", doc.domain, doc.path);
|
||||
|
||||
hashes.push(hash.clone());
|
||||
sqlx::query("INSERT INTO fragments (id, mime, blob) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
|
||||
|
@ -370,6 +394,19 @@ pub struct DocumentIndex {
|
|||
}
|
||||
|
||||
impl DocumentIndex {
|
||||
pub async fn exists(domain: &str, path: &str, version: &chrono::NaiveDate) -> bool {
|
||||
let res: Option<Self> = sqlx::query_as(
|
||||
"SELECT * FROM document_index WHERE domain = $1 AND path = $2 AND version = $3",
|
||||
)
|
||||
.bind(domain)
|
||||
.bind(path)
|
||||
.bind(version)
|
||||
.fetch_optional(get_pg!())
|
||||
.await
|
||||
.unwrap();
|
||||
res.is_some()
|
||||
}
|
||||
|
||||
pub fn url(&self) -> String {
|
||||
format!(
|
||||
"/s/{}/{}?time={}",
|
||||
|
|
|
@ -25,7 +25,7 @@ pub fn get_mime_type(content: &[u8]) -> std::io::Result<String> {
|
|||
.spawn()?;
|
||||
|
||||
if let Some(mut stdin) = child.stdin.take() {
|
||||
stdin.write_all(content)?;
|
||||
let _ = stdin.write_all(content);
|
||||
}
|
||||
|
||||
let output = child.wait_with_output()?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue