add on demand download

This commit is contained in:
JMARyA 2024-12-29 16:58:58 +01:00
parent 319b663694
commit d8ca94bd0b
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
4 changed files with 64 additions and 27 deletions

View file

@ -11,6 +11,10 @@ services:
- "DATABASE_URL=postgres://user:pass@postgres/webarc" - "DATABASE_URL=postgres://user:pass@postgres/webarc"
- "RUST_LOG=info" - "RUST_LOG=info"
- "ROCKET_ADDRESS=0.0.0.0" - "ROCKET_ADDRESS=0.0.0.0"
# Rewrite links to point back to the archive itself
- "ROUTE_INTERNAL=true"
# Download missing routes on demand
- "DOWNLOAD_ON_DEMAND=true"
postgres: postgres:
image: timescale/timescaledb:latest-pg16 image: timescale/timescaledb:latest-pg16

View file

@ -1,18 +1,25 @@
use std::{fmt::format, fs::read_to_string, path::{Path, PathBuf}}; use std::{
fmt::format,
fs::read_to_string,
path::{Path, PathBuf},
};
pub struct WebsiteArchive { pub struct WebsiteArchive {
pub dir: PathBuf pub dir: PathBuf,
} }
pub struct Domain { pub struct Domain {
pub name: String, pub name: String,
dir: PathBuf dir: PathBuf,
} }
impl Domain { impl Domain {
pub fn new(name: &str, dir: PathBuf) -> Self { pub fn new(name: &str, dir: PathBuf) -> Self {
std::fs::create_dir_all(&dir).unwrap(); std::fs::create_dir_all(&dir).unwrap();
Self { name: name.to_string(), dir } Self {
name: name.to_string(),
dir,
}
} }
pub fn path(&self, path: &str) -> Document { pub fn path(&self, path: &str) -> Document {
@ -23,15 +30,19 @@ impl Domain {
pub struct Document { pub struct Document {
pub domain: String, pub domain: String,
pub path: String, pub path: String,
base_dir: PathBuf base_dir: PathBuf,
} }
impl Document { impl Document {
pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self { pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
Self { domain: domain.to_string(), path: path.to_string(), base_dir } Self {
domain: domain.to_string(),
path: path.to_string(),
base_dir,
}
} }
pub fn render_local(&self, version: Option<String>) -> String { pub fn render_local(&self, version: Option<String>) -> Option<String> {
let mut file_path = self.base_dir.join(&self.domain); let mut file_path = self.base_dir.join(&self.domain);
for p in self.path.split('/') { for p in self.path.split('/') {
@ -42,18 +53,18 @@ impl Document {
format!("index_{version}.html") format!("index_{version}.html")
} else { } else {
let versions = Self::versions(&file_path); let versions = Self::versions(&file_path);
versions.first().cloned().unwrap() versions.first().cloned()?
}; };
file_path = file_path.join(latest_version); file_path = file_path.join(latest_version);
// TODO : Replace links with local ones // TODO : Replace links with local ones
return std::fs::read_to_string(file_path).unwrap(); return std::fs::read_to_string(file_path).ok();
} }
pub fn versions(path: &PathBuf) -> Vec<String> { pub fn versions(path: &PathBuf) -> Vec<String> {
let mut version_list = Vec::new(); let mut version_list = Vec::new();
if let Ok(entries) = std::fs::read_dir(path) { if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries { for entry in entries {
if let Ok(entry) = entry { if let Ok(entry) = entry {
@ -63,14 +74,16 @@ impl Document {
} }
} }
} }
version_list version_list
} }
} }
impl WebsiteArchive { impl WebsiteArchive {
pub fn new(dir: &str) -> Self { pub fn new(dir: &str) -> Self {
Self { dir: PathBuf::from(dir) } Self {
dir: PathBuf::from(dir),
}
} }
pub fn get_domain(&self, domain: &str) -> Domain { pub fn get_domain(&self, domain: &str) -> Domain {
@ -96,13 +109,13 @@ impl WebsiteArchive {
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string(); let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
let filename = folder_name.join(&format!("index_{timestamp}.html")); let filename = folder_name.join(&format!("index_{timestamp}.html"));
run_command(&vec![ run_command(&vec![
"monolith", "monolith",
"-I", "-I",
"-o", "-o",
filename.to_str().unwrap(), filename.to_str().unwrap(),
&format!("https://{}/{}", domain, path) &format!("https://{}/{}", domain, path),
]); ]);
} }
} }
@ -112,7 +125,6 @@ impl WebsiteArchive {
// transparent auto page downloading // transparent auto page downloading
// redownload after threshold // redownload after threshold
fn run_command(cmd: &[&str]) { fn run_command(cmd: &[&str]) {
let mut cmd_setup = std::process::Command::new(cmd[0].clone()); let mut cmd_setup = std::process::Command::new(cmd[0].clone());
let cmd_setup = cmd_setup.args(cmd.into_iter().skip(1).collect::<Vec<_>>()); let cmd_setup = cmd_setup.args(cmd.into_iter().skip(1).collect::<Vec<_>>());

View file

@ -2,8 +2,8 @@ use archive::WebsiteArchive;
use based::get_pg; use based::get_pg;
use rocket::routes; use rocket::routes;
mod pages;
mod archive; mod archive;
mod pages;
#[rocket::launch] #[rocket::launch]
async fn launch() -> _ { async fn launch() -> _ {
@ -15,12 +15,6 @@ async fn launch() -> _ {
let arc = WebsiteArchive::new("./websites"); let arc = WebsiteArchive::new("./websites");
rocket::build() rocket::build()
.mount( .mount("/", routes![pages::index, pages::render_website])
"/",
routes![
pages::index,
pages::render_website
],
)
.manage(arc) .manage(arc)
} }

View file

@ -13,10 +13,37 @@ pub async fn index() -> StringResponse {
} }
#[get("/s/<domain>/<path..>?<time>")] #[get("/s/<domain>/<path..>?<time>")]
pub async fn render_website(domain: &str, path: PathBuf, time: Option<&str>, arc: &State<WebsiteArchive>) -> StringResponse { pub async fn render_website(
if let Some(time) = time { domain: &str,
respond_html(&arc.get_domain(domain).path(path.to_str().unwrap()).render_local(Some(time.to_string()))) path: PathBuf,
time: Option<&str>,
arc: &State<WebsiteArchive>,
) -> Option<StringResponse> {
let document = arc.get_domain(domain).path(path.to_str().unwrap());
let content = document.render_local(if time.is_some() {
Some(time.unwrap().to_string())
} else { } else {
respond_html(&arc.get_domain(domain).path(path.to_str().unwrap()).render_local(None)) None
});
if let Some(content) = content {
return Some(respond_html(&content));
} else {
if std::env::var("DOWNLOAD_ON_DEMAND")
.unwrap_or("false".to_string())
.as_str()
== "true"
{
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()));
return Some(respond_html(&document.render_local(if time.is_some() {
Some(time.unwrap().to_string())
} else {
None
})?));
}
} }
None
} }