This commit is contained in:
parent
a9f758cd9b
commit
2e5b4fc3d2
9 changed files with 141 additions and 89 deletions
105
Cargo.lock
generated
105
Cargo.lock
generated
|
@ -202,7 +202,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
|||
[[package]]
|
||||
name = "based"
|
||||
version = "0.1.0"
|
||||
source = "git+https://git.hydrar.de/jmarya/based#9afe75bc8fd961f050ee31fc9e86e37eb6f8ffb6"
|
||||
source = "git+https://git.hydrar.de/jmarya/based#696b34f2f17ef2d86f0bc77993f9b0b8b652c0f6"
|
||||
dependencies = [
|
||||
"bcrypt",
|
||||
"chrono",
|
||||
|
@ -307,9 +307,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.14"
|
||||
version = "1.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9"
|
||||
checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
@ -320,6 +320,17 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||
|
||||
[[package]]
|
||||
name = "cfb"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fnv",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
|
@ -353,9 +364,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.30"
|
||||
version = "4.5.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92b7b18d71fad5313a1e320fa9897994228ce274b60faa4d694fe0ea89cd9e6d"
|
||||
checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
|
@ -363,9 +374,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.30"
|
||||
version = "4.5.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a35db2071778a7344791a4fb4f95308b5673d219dee3ae348b86642574ecc90c"
|
||||
checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
@ -627,9 +638,9 @@ checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35"
|
|||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
version = "1.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||
checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
@ -950,9 +961,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.4.7"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e"
|
||||
checksum = "5017294ff4bb30944501348f6f8e42e6ad28f42c8bbef7a74029aff064a4e3c2"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"bytes",
|
||||
|
@ -1185,7 +1196,7 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"h2 0.4.7",
|
||||
"h2 0.4.8",
|
||||
"http 1.2.0",
|
||||
"http-body 1.0.1",
|
||||
"httparse",
|
||||
|
@ -1445,6 +1456,15 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "infer"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7"
|
||||
dependencies = [
|
||||
"cfb",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inlinable_string"
|
||||
version = "0.1.15"
|
||||
|
@ -1453,9 +1473,9 @@ checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
|
|||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
|
||||
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
@ -1530,9 +1550,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.169"
|
||||
version = "0.2.170"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
|
@ -1574,9 +1594,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.25"
|
||||
version = "0.4.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
|
||||
checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
|
||||
|
||||
[[package]]
|
||||
name = "loom"
|
||||
|
@ -1680,9 +1700,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
|||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b"
|
||||
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
]
|
||||
|
@ -1719,9 +1739,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.13"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c"
|
||||
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
|
@ -2175,9 +2195,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.8"
|
||||
version = "0.5.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
|
||||
checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
|
||||
dependencies = [
|
||||
"bitflags 2.8.0",
|
||||
]
|
||||
|
@ -2297,7 +2317,7 @@ dependencies = [
|
|||
"encoding_rs",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.4.7",
|
||||
"h2 0.4.8",
|
||||
"http 1.2.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
|
@ -2347,9 +2367,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.9"
|
||||
version = "0.17.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24"
|
||||
checksum = "da5349ae27d3887ca812fb375b45a4fbb36d8d12d2df394968cd86e35683fe73"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
|
@ -2540,7 +2560,7 @@ version = "0.102.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
|
||||
dependencies = [
|
||||
"ring 0.17.9",
|
||||
"ring 0.17.11",
|
||||
"rustls-pki-types",
|
||||
"untrusted 0.9.0",
|
||||
]
|
||||
|
@ -2637,18 +2657,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.217"
|
||||
version = "1.0.218"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||
checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.217"
|
||||
version = "1.0.218"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||
checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -2668,9 +2688,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.138"
|
||||
version = "1.0.139"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
|
||||
checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
|
@ -3055,9 +3075,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "244292f3441c89febe5b5bdfbb6863aeaf4f64da810ea3050fd927b27b8d92ce"
|
||||
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
|
@ -3558,9 +3578,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.16"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
|
||||
checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
|
@ -3641,9 +3661,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.13.2"
|
||||
version = "1.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6"
|
||||
checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1"
|
||||
dependencies = [
|
||||
"getrandom 0.3.1",
|
||||
"serde",
|
||||
|
@ -3798,6 +3818,7 @@ dependencies = [
|
|||
"env_logger",
|
||||
"futures",
|
||||
"html2md",
|
||||
"infer",
|
||||
"log",
|
||||
"maud",
|
||||
"ollama-rs",
|
||||
|
@ -4054,9 +4075,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
|||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.2"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603"
|
||||
checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
|
|
@ -25,3 +25,4 @@ html2md = "0.2.14"
|
|||
clap = { version = "4.5.23", features = ["cargo", "derive"] }
|
||||
toml = "0.8.19"
|
||||
url-escape = "0.1.1"
|
||||
infer = "0.19.0"
|
||||
|
|
|
@ -9,11 +9,11 @@ web_archive/
|
|||
├─ domain.com/
|
||||
│ ├─ sub/
|
||||
│ │ ├─ path/
|
||||
│ │ │ ├─ index_YYYY_MM_DD.html
|
||||
│ │ │ ├─ index_YYYY_MM_DD
|
||||
├─ sub.domain.com/
|
||||
```
|
||||
|
||||
Every document of this web archive can then be found at `archive/domain/paths.../index_YYYY_MM_DD.html`.
|
||||
Every document of this web archive can then be found at `archive/domain/paths.../index_YYYY_MM_DD`.
|
||||
|
||||
## Usage
|
||||
webarc provides a CLI tool to work with the archive structure.
|
||||
|
|
|
@ -140,7 +140,8 @@ impl Embedding for Document {
|
|||
ver.as_ref().unwrap_or(&latest)
|
||||
);
|
||||
|
||||
let content_html = self.render_local(ver.clone(), shell).await?;
|
||||
let content_html = self.render_local(ver.clone(), shell).await.ok()?;
|
||||
let content_html = String::from_utf8_lossy(&content_html);
|
||||
let content = remove_data_urls(&html2md::parse_html(&content_html));
|
||||
|
||||
let mut embeddings = Vec::new();
|
||||
|
|
|
@ -3,9 +3,9 @@ use std::{io::Read, path::PathBuf};
|
|||
use based::{request::RequestContext, ui::components::prelude::Shell};
|
||||
use maud::html;
|
||||
|
||||
use crate::{blacklist::check_blacklist, conf::get_config, render_page};
|
||||
use crate::{blacklist::check_blacklist, render_page};
|
||||
|
||||
use super::{internalize_urls, read_dir};
|
||||
use super::read_dir;
|
||||
|
||||
/// Represents a document within a domain
|
||||
pub struct Document {
|
||||
|
@ -52,43 +52,39 @@ impl Document {
|
|||
///
|
||||
/// # Returns
|
||||
/// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
|
||||
pub async fn render_local(&self, version: Option<String>, shell: &Shell) -> Option<String> {
|
||||
pub async fn render_local(
|
||||
&self,
|
||||
version: Option<String>,
|
||||
shell: &Shell,
|
||||
) -> Result<Vec<u8>, String> {
|
||||
if check_blacklist(&self.domain) {
|
||||
let content = html! {
|
||||
h3 { "This site is blacklisted" };
|
||||
};
|
||||
return Some(
|
||||
render_page(content, RequestContext::default(), shell)
|
||||
.await
|
||||
.1
|
||||
.1,
|
||||
);
|
||||
return Err(render_page(content, RequestContext::default(), shell)
|
||||
.await
|
||||
.1
|
||||
.1);
|
||||
}
|
||||
|
||||
let mut file_path = self.doc_dir();
|
||||
|
||||
let latest_version = if let Some(version) = version {
|
||||
format!("index_{version}.html")
|
||||
format!("index_{version}")
|
||||
} else {
|
||||
let versions = self.versions();
|
||||
let version = versions.first().cloned()?;
|
||||
format!("index_{version}.html")
|
||||
let version = versions.first().cloned().ok_or(String::new())?;
|
||||
format!("index_{version}")
|
||||
};
|
||||
|
||||
file_path = file_path.join(latest_version);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
std::fs::File::open(file_path)
|
||||
.ok()?
|
||||
.map_err(|_| String::new())?
|
||||
.read_to_end(&mut buf)
|
||||
.unwrap();
|
||||
let content = String::from_utf8_lossy(&buf);
|
||||
|
||||
if get_config().ROUTE_INTERNAL {
|
||||
Some(internalize_urls(&content, &self.domain))
|
||||
} else {
|
||||
Some(content.to_string())
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Determines the directory where the document is stored.
|
||||
|
@ -127,12 +123,8 @@ impl Document {
|
|||
let mut res: Vec<String> = read_dir(&self.doc_dir())
|
||||
.into_iter()
|
||||
.filter_map(|x| {
|
||||
if x.starts_with("index_") && x.ends_with(".html") {
|
||||
return Some(
|
||||
x.trim_start_matches("index_")
|
||||
.trim_end_matches(".html")
|
||||
.to_string(),
|
||||
);
|
||||
if x.starts_with("index_") {
|
||||
return Some(x.trim_start_matches("index_").to_string());
|
||||
}
|
||||
|
||||
None
|
||||
|
|
|
@ -98,7 +98,7 @@ impl Domain {
|
|||
.filter(|x| !x.is_empty())
|
||||
.collect::<Vec<&str>>()
|
||||
.join("/");
|
||||
if entry.starts_with("index_") && entry.ends_with(".html") {
|
||||
if entry.starts_with("index_") {
|
||||
is_doc = true;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
|||
}
|
||||
|
||||
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
|
||||
fn internalize_urls(input: &str, base: &str) -> String {
|
||||
pub fn internalize_urls(input: &str, base: &str) -> String {
|
||||
// todo : fix regex, domains without path are not captured
|
||||
let url_pattern = r#"(\ |"|')(?:(<?)(https?:\/\/([a-zA-Z0-9.-]+))?(\/[\w./-]*))"#;
|
||||
let re = regex::Regex::new(url_pattern).unwrap();
|
||||
|
@ -172,7 +172,7 @@ impl WebsiteArchive {
|
|||
std::fs::create_dir_all(&folder_name).unwrap();
|
||||
|
||||
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
|
||||
let filename = folder_name.join(format!("index_{timestamp}.html"));
|
||||
let filename = folder_name.join(format!("index_{timestamp}"));
|
||||
|
||||
log::info!("Archiving {url} to {}", filename.to_str().unwrap());
|
||||
|
||||
|
@ -238,10 +238,17 @@ fn run_command(cmd: &[&str]) {
|
|||
let child = cmd_setup.spawn().unwrap();
|
||||
|
||||
let status = child.wait_with_output().unwrap();
|
||||
assert!(status.status.success());
|
||||
if !status.status.success() {
|
||||
log::warn!(
|
||||
"Command {cmd:?} exited with code {}",
|
||||
status.status.code().unwrap_or_default()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn index_archive_db(arc: &WebsiteArchive) {
|
||||
// TODO : more index attrs size,mime
|
||||
|
||||
log::info!("Indexing archive");
|
||||
|
||||
for dom in arc.domains() {
|
||||
|
|
|
@ -158,16 +158,17 @@ async fn main() {
|
|||
|
||||
let content = doc.render_local(Some(ver), &shell).await;
|
||||
|
||||
if content.is_none() {
|
||||
if content.is_err() {
|
||||
println!("No document found");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if md {
|
||||
let markdown = html2md::parse_html(&content.unwrap());
|
||||
let markdown =
|
||||
html2md::parse_html(&String::from_utf8_lossy(&content.unwrap()));
|
||||
println!("{markdown}");
|
||||
} else {
|
||||
println!("{}", content.unwrap());
|
||||
println!("{}", String::from_utf8_lossy(&content.unwrap()));
|
||||
}
|
||||
}
|
||||
Some((&_, _)) => {}
|
||||
|
|
|
@ -18,7 +18,7 @@ pub mod component;
|
|||
use component::*;
|
||||
use serde_json::json;
|
||||
|
||||
use webarc::archive::{Document, DocumentIndex};
|
||||
use webarc::archive::{internalize_urls, Document, DocumentIndex};
|
||||
use webarc::{
|
||||
ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult},
|
||||
archive::{extract_domains, WebsiteArchive},
|
||||
|
@ -26,6 +26,8 @@ use webarc::{
|
|||
render_page,
|
||||
};
|
||||
|
||||
// TODO : PDF view
|
||||
|
||||
const SEARCH_BAR_STYLE: &str = "w-full px-4 mb-4 py-2 text-white bg-black border-2 border-neon-blue placeholder-neon-blue focus:ring-2 focus:ring-neon-pink focus:outline-none font-mono text-lg";
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
|
@ -226,12 +228,12 @@ pub async fn domain_info_route(
|
|||
let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap());
|
||||
let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect();
|
||||
|
||||
let domains = extract_domains(
|
||||
let domains = extract_domains(&String::from_utf8_lossy(
|
||||
&document
|
||||
.render_local(None, &shell)
|
||||
.await
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
));
|
||||
|
||||
let content = html! {
|
||||
h2 class="text-xl font-bold mb-4 flex items-center w-fit mx-auto" {
|
||||
|
@ -307,9 +309,12 @@ pub async fn render_txt_website(
|
|||
) -> Option<String> {
|
||||
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
||||
|
||||
let mut content = document
|
||||
let content = document
|
||||
.render_local(time.map(|time| time.to_string()), &shell)
|
||||
.await?;
|
||||
.await
|
||||
.ok()?;
|
||||
|
||||
let mut content = String::from_utf8_lossy(&content).to_string();
|
||||
|
||||
if no_data_urls.is_some() {
|
||||
content = remove_data_urls(&content);
|
||||
|
@ -376,11 +381,23 @@ pub async fn redownload(
|
|||
arc.archive_url(&format!("https://{domain}/{}", path.to_str()))
|
||||
.await;
|
||||
|
||||
let content = document.render_local(None, &shell).await?;
|
||||
let mut content = document.render_local(None, &shell).await.ok()?;
|
||||
|
||||
let mime = infer::get(&content)
|
||||
.map(|x| x.mime_type())
|
||||
.unwrap_or("text/html");
|
||||
|
||||
if mime == "text/html" {
|
||||
if get_config().ROUTE_INTERNAL {
|
||||
content = internalize_urls(&String::from_utf8_lossy(&content), &domain)
|
||||
.as_bytes()
|
||||
.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
return Some(DataResponse::new(
|
||||
content.as_bytes().to_vec(),
|
||||
"text/html".to_string(),
|
||||
content.to_vec(),
|
||||
mime.to_string(),
|
||||
Some(60 * 60 * 24),
|
||||
));
|
||||
}
|
||||
|
@ -414,10 +431,22 @@ pub async fn render_website(
|
|||
|
||||
// TODO : keep n versions
|
||||
|
||||
if let Some(content) = content {
|
||||
if let Ok(mut content) = content {
|
||||
let mime = infer::get(&content)
|
||||
.map(|x| x.mime_type())
|
||||
.unwrap_or("text/html");
|
||||
|
||||
if mime == "text/html" {
|
||||
if get_config().ROUTE_INTERNAL {
|
||||
content = internalize_urls(&String::from_utf8_lossy(&content), &domain)
|
||||
.as_bytes()
|
||||
.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
return Some(DataResponse::new(
|
||||
content.as_bytes().to_vec(),
|
||||
"text/html".to_string(),
|
||||
content,
|
||||
mime.to_string(),
|
||||
Some(60 * 60 * 24),
|
||||
));
|
||||
} else if get_config().DOWNLOAD_ON_DEMAND && time.is_none() {
|
||||
|
|
Loading…
Add table
Reference in a new issue