update
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
JMARyA 2025-01-02 23:35:41 +01:00
parent 6700d4d817
commit a4a60c86df
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
2 changed files with 11 additions and 6 deletions

View file

@ -122,7 +122,7 @@ pub fn chunked(s: &str) -> Vec<String> {
.collect() .collect()
} }
fn remove_data_urls(input: &str) -> String { pub fn remove_data_urls(input: &str) -> String {
let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap(); let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap();
// Replace all occurrences of data URLs with an empty string // Replace all occurrences of data URLs with an empty string

View file

@ -14,7 +14,7 @@ use component::*;
use serde_json::json; use serde_json::json;
use webarc::{ use webarc::{
ai::{generate_embedding, EmbedStore, SearchResult}, ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
archive::WebsiteArchive, archive::WebsiteArchive,
conf::get_config, conf::get_config,
render_page, render_page,
@ -139,20 +139,25 @@ pub async fn domain_info_route(
render_page(content, ctx).await render_page(content, ctx).await
} }
#[get("/txt/<domain>/<path..>?<time>")] #[get("/txt/<domain>/<path..>?<time>&<no_data_urls>")]
pub async fn render_txt_website( pub async fn render_txt_website(
domain: &str, domain: &str,
path: PathBuf, path: PathBuf,
time: Option<&str>, time: Option<&str>,
no_data_urls: Option<&str>,
arc: &State<WebsiteArchive>, arc: &State<WebsiteArchive>,
) -> Option<String> { ) -> Option<String> {
let document = arc.get_domain(domain).path(path.to_str().unwrap()); let document = arc.get_domain(domain).path(path.to_str().unwrap());
let content = document let mut content = document
.render_local(time.map(|time| time.to_string())) .render_local(time.map(|time| time.to_string()))
.await; .await?;
content.map(|content_html| html2md::parse_html(&content_html)) if no_data_urls.is_some() {
content = remove_data_urls(&content);
}
Some(html2md::parse_html(&content))
} }
/// Return archived version of `domain` / `path` at `time` /// Return archived version of `domain` / `path` at `time`