parent
6700d4d817
commit
a4a60c86df
2 changed files with 11 additions and 6 deletions
|
@ -122,7 +122,7 @@ pub fn chunked(s: &str) -> Vec<String> {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn remove_data_urls(input: &str) -> String {
|
pub fn remove_data_urls(input: &str) -> String {
|
||||||
let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap();
|
let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap();
|
||||||
|
|
||||||
// Replace all occurrences of data URLs with an empty string
|
// Replace all occurrences of data URLs with an empty string
|
||||||
|
|
|
@ -14,7 +14,7 @@ use component::*;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use webarc::{
|
use webarc::{
|
||||||
ai::{generate_embedding, EmbedStore, SearchResult},
|
ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
|
||||||
archive::WebsiteArchive,
|
archive::WebsiteArchive,
|
||||||
conf::get_config,
|
conf::get_config,
|
||||||
render_page,
|
render_page,
|
||||||
|
@ -139,20 +139,25 @@ pub async fn domain_info_route(
|
||||||
render_page(content, ctx).await
|
render_page(content, ctx).await
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/txt/<domain>/<path..>?<time>")]
|
#[get("/txt/<domain>/<path..>?<time>&<no_data_urls>")]
|
||||||
pub async fn render_txt_website(
|
pub async fn render_txt_website(
|
||||||
domain: &str,
|
domain: &str,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
time: Option<&str>,
|
time: Option<&str>,
|
||||||
|
no_data_urls: Option<&str>,
|
||||||
arc: &State<WebsiteArchive>,
|
arc: &State<WebsiteArchive>,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
||||||
|
|
||||||
let content = document
|
let mut content = document
|
||||||
.render_local(time.map(|time| time.to_string()))
|
.render_local(time.map(|time| time.to_string()))
|
||||||
.await;
|
.await?;
|
||||||
|
|
||||||
content.map(|content_html| html2md::parse_html(&content_html))
|
if no_data_urls.is_some() {
|
||||||
|
content = remove_data_urls(&content);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(html2md::parse_html(&content))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return archived version of `domain` / `path` at `time`
|
/// Return archived version of `domain` / `path` at `time`
|
||||||
|
|
Loading…
Add table
Reference in a new issue