update
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
JMARyA 2025-01-02 23:35:41 +01:00
parent 6700d4d817
commit a4a60c86df
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
2 changed files with 11 additions and 6 deletions

View file

@ -122,7 +122,7 @@ pub fn chunked(s: &str) -> Vec<String> {
.collect()
}
fn remove_data_urls(input: &str) -> String {
pub fn remove_data_urls(input: &str) -> String {
let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap();
// Replace all occurrences of data URLs with an empty string

View file

@ -14,7 +14,7 @@ use component::*;
use serde_json::json;
use webarc::{
ai::{generate_embedding, EmbedStore, SearchResult},
ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
archive::WebsiteArchive,
conf::get_config,
render_page,
@ -139,20 +139,25 @@ pub async fn domain_info_route(
render_page(content, ctx).await
}
#[get("/txt/<domain>/<path..>?<time>")]
#[get("/txt/<domain>/<path..>?<time>&<no_data_urls>")]
pub async fn render_txt_website(
domain: &str,
path: PathBuf,
time: Option<&str>,
no_data_urls: Option<&str>,
arc: &State<WebsiteArchive>,
) -> Option<String> {
let document = arc.get_domain(domain).path(path.to_str().unwrap());
let content = document
let mut content = document
.render_local(time.map(|time| time.to_string()))
.await;
.await?;
content.map(|content_html| html2md::parse_html(&content_html))
if no_data_urls.is_some() {
content = remove_data_urls(&content);
}
Some(html2md::parse_html(&content))
}
/// Return archived version of `domain` / `path` at `time`