any mime
Some checks are pending
ci/woodpecker/push/build Pipeline is pending

This commit is contained in:
JMARyA 2025-02-24 19:30:20 +01:00
parent a9f758cd9b
commit 2e5b4fc3d2
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
9 changed files with 141 additions and 89 deletions

105
Cargo.lock generated
View file

@ -202,7 +202,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "based" name = "based"
version = "0.1.0" version = "0.1.0"
source = "git+https://git.hydrar.de/jmarya/based#9afe75bc8fd961f050ee31fc9e86e37eb6f8ffb6" source = "git+https://git.hydrar.de/jmarya/based#696b34f2f17ef2d86f0bc77993f9b0b8b652c0f6"
dependencies = [ dependencies = [
"bcrypt", "bcrypt",
"chrono", "chrono",
@ -307,9 +307,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.14" version = "1.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af"
dependencies = [ dependencies = [
"shlex", "shlex",
] ]
@ -320,6 +320,17 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cfb"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f"
dependencies = [
"byteorder",
"fnv",
"uuid",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -353,9 +364,9 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.30" version = "4.5.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92b7b18d71fad5313a1e320fa9897994228ce274b60faa4d694fe0ea89cd9e6d" checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
"clap_derive", "clap_derive",
@ -363,9 +374,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_builder" name = "clap_builder"
version = "4.5.30" version = "4.5.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a35db2071778a7344791a4fb4f95308b5673d219dee3ae348b86642574ecc90c" checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863"
dependencies = [ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
@ -627,9 +638,9 @@ checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35"
[[package]] [[package]]
name = "either" name = "either"
version = "1.13.0" version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -950,9 +961,9 @@ dependencies = [
[[package]] [[package]]
name = "h2" name = "h2"
version = "0.4.7" version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" checksum = "5017294ff4bb30944501348f6f8e42e6ad28f42c8bbef7a74029aff064a4e3c2"
dependencies = [ dependencies = [
"atomic-waker", "atomic-waker",
"bytes", "bytes",
@ -1185,7 +1196,7 @@ dependencies = [
"bytes", "bytes",
"futures-channel", "futures-channel",
"futures-util", "futures-util",
"h2 0.4.7", "h2 0.4.8",
"http 1.2.0", "http 1.2.0",
"http-body 1.0.1", "http-body 1.0.1",
"httparse", "httparse",
@ -1445,6 +1456,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "infer"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7"
dependencies = [
"cfb",
]
[[package]] [[package]]
name = "inlinable_string" name = "inlinable_string"
version = "0.1.15" version = "0.1.15"
@ -1453,9 +1473,9 @@ checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]] [[package]]
name = "inout" name = "inout"
version = "0.1.3" version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
dependencies = [ dependencies = [
"generic-array", "generic-array",
] ]
@ -1530,9 +1550,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.169" version = "0.2.170"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
[[package]] [[package]]
name = "libm" name = "libm"
@ -1574,9 +1594,9 @@ dependencies = [
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.25" version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
[[package]] [[package]]
name = "loom" name = "loom"
@ -1680,9 +1700,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.8.4" version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [ dependencies = [
"adler2", "adler2",
] ]
@ -1719,9 +1739,9 @@ dependencies = [
[[package]] [[package]]
name = "native-tls" name = "native-tls"
version = "0.2.13" version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
@ -2175,9 +2195,9 @@ dependencies = [
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.8" version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.8.0",
] ]
@ -2297,7 +2317,7 @@ dependencies = [
"encoding_rs", "encoding_rs",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2 0.4.7", "h2 0.4.8",
"http 1.2.0", "http 1.2.0",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
@ -2347,9 +2367,9 @@ dependencies = [
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.9" version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" checksum = "da5349ae27d3887ca812fb375b45a4fbb36d8d12d2df394968cd86e35683fe73"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if", "cfg-if",
@ -2540,7 +2560,7 @@ version = "0.102.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
dependencies = [ dependencies = [
"ring 0.17.9", "ring 0.17.11",
"rustls-pki-types", "rustls-pki-types",
"untrusted 0.9.0", "untrusted 0.9.0",
] ]
@ -2637,18 +2657,18 @@ dependencies = [
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.217" version = "1.0.218"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.217" version = "1.0.218"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -2668,9 +2688,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.138" version = "1.0.139"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
dependencies = [ dependencies = [
"itoa", "itoa",
"memchr", "memchr",
@ -3055,9 +3075,9 @@ dependencies = [
[[package]] [[package]]
name = "string_cache_codegen" name = "string_cache_codegen"
version = "0.5.3" version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "244292f3441c89febe5b5bdfbb6863aeaf4f64da810ea3050fd927b27b8d92ce" checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [ dependencies = [
"phf_generator", "phf_generator",
"phf_shared", "phf_shared",
@ -3558,9 +3578,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.16" version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
[[package]] [[package]]
name = "unicode-normalization" name = "unicode-normalization"
@ -3641,9 +3661,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.13.2" version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6" checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1"
dependencies = [ dependencies = [
"getrandom 0.3.1", "getrandom 0.3.1",
"serde", "serde",
@ -3798,6 +3818,7 @@ dependencies = [
"env_logger", "env_logger",
"futures", "futures",
"html2md", "html2md",
"infer",
"log", "log",
"maud", "maud",
"ollama-rs", "ollama-rs",
@ -4054,9 +4075,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.2" version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603" checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]

View file

@ -25,3 +25,4 @@ html2md = "0.2.14"
clap = { version = "4.5.23", features = ["cargo", "derive"] } clap = { version = "4.5.23", features = ["cargo", "derive"] }
toml = "0.8.19" toml = "0.8.19"
url-escape = "0.1.1" url-escape = "0.1.1"
infer = "0.19.0"

View file

@ -9,11 +9,11 @@ web_archive/
├─ domain.com/ ├─ domain.com/
│ ├─ sub/ │ ├─ sub/
│ │ ├─ path/ │ │ ├─ path/
│ │ │ ├─ index_YYYY_MM_DD.html │ │ │ ├─ index_YYYY_MM_DD
├─ sub.domain.com/ ├─ sub.domain.com/
``` ```
Every document of this web archive can then be found at `archive/domain/paths.../index_YYYY_MM_DD.html`. Every document of this web archive can then be found at `archive/domain/paths.../index_YYYY_MM_DD`.
## Usage ## Usage
webarc provides a CLI tool to work with the archive structure. webarc provides a CLI tool to work with the archive structure.

View file

@ -140,7 +140,8 @@ impl Embedding for Document {
ver.as_ref().unwrap_or(&latest) ver.as_ref().unwrap_or(&latest)
); );
let content_html = self.render_local(ver.clone(), shell).await?; let content_html = self.render_local(ver.clone(), shell).await.ok()?;
let content_html = String::from_utf8_lossy(&content_html);
let content = remove_data_urls(&html2md::parse_html(&content_html)); let content = remove_data_urls(&html2md::parse_html(&content_html));
let mut embeddings = Vec::new(); let mut embeddings = Vec::new();

View file

@ -3,9 +3,9 @@ use std::{io::Read, path::PathBuf};
use based::{request::RequestContext, ui::components::prelude::Shell}; use based::{request::RequestContext, ui::components::prelude::Shell};
use maud::html; use maud::html;
use crate::{blacklist::check_blacklist, conf::get_config, render_page}; use crate::{blacklist::check_blacklist, render_page};
use super::{internalize_urls, read_dir}; use super::read_dir;
/// Represents a document within a domain /// Represents a document within a domain
pub struct Document { pub struct Document {
@ -52,43 +52,39 @@ impl Document {
/// ///
/// # Returns /// # Returns
/// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered. /// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
pub async fn render_local(&self, version: Option<String>, shell: &Shell) -> Option<String> { pub async fn render_local(
&self,
version: Option<String>,
shell: &Shell,
) -> Result<Vec<u8>, String> {
if check_blacklist(&self.domain) { if check_blacklist(&self.domain) {
let content = html! { let content = html! {
h3 { "This site is blacklisted" }; h3 { "This site is blacklisted" };
}; };
return Some( return Err(render_page(content, RequestContext::default(), shell)
render_page(content, RequestContext::default(), shell)
.await .await
.1 .1
.1, .1);
);
} }
let mut file_path = self.doc_dir(); let mut file_path = self.doc_dir();
let latest_version = if let Some(version) = version { let latest_version = if let Some(version) = version {
format!("index_{version}.html") format!("index_{version}")
} else { } else {
let versions = self.versions(); let versions = self.versions();
let version = versions.first().cloned()?; let version = versions.first().cloned().ok_or(String::new())?;
format!("index_{version}.html") format!("index_{version}")
}; };
file_path = file_path.join(latest_version); file_path = file_path.join(latest_version);
let mut buf = Vec::new(); let mut buf = Vec::new();
std::fs::File::open(file_path) std::fs::File::open(file_path)
.ok()? .map_err(|_| String::new())?
.read_to_end(&mut buf) .read_to_end(&mut buf)
.unwrap(); .unwrap();
let content = String::from_utf8_lossy(&buf); Ok(buf)
if get_config().ROUTE_INTERNAL {
Some(internalize_urls(&content, &self.domain))
} else {
Some(content.to_string())
}
} }
/// Determines the directory where the document is stored. /// Determines the directory where the document is stored.
@ -127,12 +123,8 @@ impl Document {
let mut res: Vec<String> = read_dir(&self.doc_dir()) let mut res: Vec<String> = read_dir(&self.doc_dir())
.into_iter() .into_iter()
.filter_map(|x| { .filter_map(|x| {
if x.starts_with("index_") && x.ends_with(".html") { if x.starts_with("index_") {
return Some( return Some(x.trim_start_matches("index_").to_string());
x.trim_start_matches("index_")
.trim_end_matches(".html")
.to_string(),
);
} }
None None

View file

@ -98,7 +98,7 @@ impl Domain {
.filter(|x| !x.is_empty()) .filter(|x| !x.is_empty())
.collect::<Vec<&str>>() .collect::<Vec<&str>>()
.join("/"); .join("/");
if entry.starts_with("index_") && entry.ends_with(".html") { if entry.starts_with("index_") {
is_doc = true; is_doc = true;
continue; continue;
} }

View file

@ -32,7 +32,7 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
} }
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>` /// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
fn internalize_urls(input: &str, base: &str) -> String { pub fn internalize_urls(input: &str, base: &str) -> String {
// todo : fix regex, domains without path are not captured // todo : fix regex, domains without path are not captured
let url_pattern = r#"(\ |"|')(?:(<?)(https?:\/\/([a-zA-Z0-9.-]+))?(\/[\w./-]*))"#; let url_pattern = r#"(\ |"|')(?:(<?)(https?:\/\/([a-zA-Z0-9.-]+))?(\/[\w./-]*))"#;
let re = regex::Regex::new(url_pattern).unwrap(); let re = regex::Regex::new(url_pattern).unwrap();
@ -172,7 +172,7 @@ impl WebsiteArchive {
std::fs::create_dir_all(&folder_name).unwrap(); std::fs::create_dir_all(&folder_name).unwrap();
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string(); let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
let filename = folder_name.join(format!("index_{timestamp}.html")); let filename = folder_name.join(format!("index_{timestamp}"));
log::info!("Archiving {url} to {}", filename.to_str().unwrap()); log::info!("Archiving {url} to {}", filename.to_str().unwrap());
@ -238,10 +238,17 @@ fn run_command(cmd: &[&str]) {
let child = cmd_setup.spawn().unwrap(); let child = cmd_setup.spawn().unwrap();
let status = child.wait_with_output().unwrap(); let status = child.wait_with_output().unwrap();
assert!(status.status.success()); if !status.status.success() {
log::warn!(
"Command {cmd:?} exited with code {}",
status.status.code().unwrap_or_default()
)
}
} }
pub async fn index_archive_db(arc: &WebsiteArchive) { pub async fn index_archive_db(arc: &WebsiteArchive) {
// TODO : more index attrs size,mime
log::info!("Indexing archive"); log::info!("Indexing archive");
for dom in arc.domains() { for dom in arc.domains() {

View file

@ -158,16 +158,17 @@ async fn main() {
let content = doc.render_local(Some(ver), &shell).await; let content = doc.render_local(Some(ver), &shell).await;
if content.is_none() { if content.is_err() {
println!("No document found"); println!("No document found");
std::process::exit(1); std::process::exit(1);
} }
if md { if md {
let markdown = html2md::parse_html(&content.unwrap()); let markdown =
html2md::parse_html(&String::from_utf8_lossy(&content.unwrap()));
println!("{markdown}"); println!("{markdown}");
} else { } else {
println!("{}", content.unwrap()); println!("{}", String::from_utf8_lossy(&content.unwrap()));
} }
} }
Some((&_, _)) => {} Some((&_, _)) => {}

View file

@ -18,7 +18,7 @@ pub mod component;
use component::*; use component::*;
use serde_json::json; use serde_json::json;
use webarc::archive::{Document, DocumentIndex}; use webarc::archive::{internalize_urls, Document, DocumentIndex};
use webarc::{ use webarc::{
ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult}, ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
archive::{extract_domains, WebsiteArchive}, archive::{extract_domains, WebsiteArchive},
@ -26,6 +26,8 @@ use webarc::{
render_page, render_page,
}; };
// TODO : PDF view
const SEARCH_BAR_STYLE: &str = "w-full px-4 mb-4 py-2 text-white bg-black border-2 border-neon-blue placeholder-neon-blue focus:ring-2 focus:ring-neon-pink focus:outline-none font-mono text-lg"; const SEARCH_BAR_STYLE: &str = "w-full px-4 mb-4 py-2 text-white bg-black border-2 border-neon-blue placeholder-neon-blue focus:ring-2 focus:ring-neon-pink focus:outline-none font-mono text-lg";
#[allow(non_snake_case)] #[allow(non_snake_case)]
@ -226,12 +228,12 @@ pub async fn domain_info_route(
let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap()); let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap());
let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect(); let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect();
let domains = extract_domains( let domains = extract_domains(&String::from_utf8_lossy(
&document &document
.render_local(None, &shell) .render_local(None, &shell)
.await .await
.unwrap_or_default(), .unwrap_or_default(),
); ));
let content = html! { let content = html! {
h2 class="text-xl font-bold mb-4 flex items-center w-fit mx-auto" { h2 class="text-xl font-bold mb-4 flex items-center w-fit mx-auto" {
@ -307,9 +309,12 @@ pub async fn render_txt_website(
) -> Option<String> { ) -> Option<String> {
let document = arc.get_domain(domain).path(path.to_str().unwrap()); let document = arc.get_domain(domain).path(path.to_str().unwrap());
let mut content = document let content = document
.render_local(time.map(|time| time.to_string()), &shell) .render_local(time.map(|time| time.to_string()), &shell)
.await?; .await
.ok()?;
let mut content = String::from_utf8_lossy(&content).to_string();
if no_data_urls.is_some() { if no_data_urls.is_some() {
content = remove_data_urls(&content); content = remove_data_urls(&content);
@ -376,11 +381,23 @@ pub async fn redownload(
arc.archive_url(&format!("https://{domain}/{}", path.to_str())) arc.archive_url(&format!("https://{domain}/{}", path.to_str()))
.await; .await;
let content = document.render_local(None, &shell).await?; let mut content = document.render_local(None, &shell).await.ok()?;
let mime = infer::get(&content)
.map(|x| x.mime_type())
.unwrap_or("text/html");
if mime == "text/html" {
if get_config().ROUTE_INTERNAL {
content = internalize_urls(&String::from_utf8_lossy(&content), &domain)
.as_bytes()
.to_vec();
}
}
return Some(DataResponse::new( return Some(DataResponse::new(
content.as_bytes().to_vec(), content.to_vec(),
"text/html".to_string(), mime.to_string(),
Some(60 * 60 * 24), Some(60 * 60 * 24),
)); ));
} }
@ -414,10 +431,22 @@ pub async fn render_website(
// TODO : keep n versions // TODO : keep n versions
if let Some(content) = content { if let Ok(mut content) = content {
let mime = infer::get(&content)
.map(|x| x.mime_type())
.unwrap_or("text/html");
if mime == "text/html" {
if get_config().ROUTE_INTERNAL {
content = internalize_urls(&String::from_utf8_lossy(&content), &domain)
.as_bytes()
.to_vec();
}
}
return Some(DataResponse::new( return Some(DataResponse::new(
content.as_bytes().to_vec(), content,
"text/html".to_string(), mime.to_string(),
Some(60 * 60 * 24), Some(60 * 60 * 24),
)); ));
} else if get_config().DOWNLOAD_ON_DEMAND && time.is_none() { } else if get_config().DOWNLOAD_ON_DEMAND && time.is_none() {