parent
6700d4d817
commit
a4a60c86df
2 changed files with 11 additions and 6 deletions
|
@ -122,7 +122,7 @@ pub fn chunked(s: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn remove_data_urls(input: &str) -> String {
|
||||
pub fn remove_data_urls(input: &str) -> String {
|
||||
let re = regex::Regex::new("data:(.*?)(;base64)?,(.*)").unwrap();
|
||||
|
||||
// Replace all occurrences of data URLs with an empty string
|
||||
|
|
|
@ -14,7 +14,7 @@ use component::*;
|
|||
use serde_json::json;
|
||||
|
||||
use webarc::{
|
||||
ai::{generate_embedding, EmbedStore, SearchResult},
|
||||
ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
|
||||
archive::WebsiteArchive,
|
||||
conf::get_config,
|
||||
render_page,
|
||||
|
@ -139,20 +139,25 @@ pub async fn domain_info_route(
|
|||
render_page(content, ctx).await
|
||||
}
|
||||
|
||||
#[get("/txt/<domain>/<path..>?<time>")]
|
||||
#[get("/txt/<domain>/<path..>?<time>&<no_data_urls>")]
|
||||
pub async fn render_txt_website(
|
||||
domain: &str,
|
||||
path: PathBuf,
|
||||
time: Option<&str>,
|
||||
no_data_urls: Option<&str>,
|
||||
arc: &State<WebsiteArchive>,
|
||||
) -> Option<String> {
|
||||
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
||||
|
||||
let content = document
|
||||
let mut content = document
|
||||
.render_local(time.map(|time| time.to_string()))
|
||||
.await;
|
||||
.await?;
|
||||
|
||||
content.map(|content_html| html2md::parse_html(&content_html))
|
||||
if no_data_urls.is_some() {
|
||||
content = remove_data_urls(&content);
|
||||
}
|
||||
|
||||
Some(html2md::parse_html(&content))
|
||||
}
|
||||
|
||||
/// Return archived version of `domain` / `path` at `time`
|
||||
|
|
Loading…
Add table
Reference in a new issue