use std::{fmt::format, fs::read_to_string, path::{Path, PathBuf}}; pub struct WebsiteArchive { pub dir: PathBuf } pub struct Domain { pub name: String, dir: PathBuf } impl Domain { pub fn new(name: &str, dir: PathBuf) -> Self { std::fs::create_dir_all(&dir).unwrap(); Self { name: name.to_string(), dir } } pub fn path(&self, path: &str) -> Document { Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf()) } } pub struct Document { pub domain: String, pub path: String, base_dir: PathBuf } impl Document { pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self { Self { domain: domain.to_string(), path: path.to_string(), base_dir } } pub fn render_local(&self, version: Option) -> String { let mut file_path = self.base_dir.join(&self.domain); for p in self.path.split('/') { file_path = file_path.join(p); } let latest_version = if let Some(version) = version { format!("index_{version}.html") } else { let versions = Self::versions(&file_path); versions.first().cloned().unwrap() }; file_path = file_path.join(latest_version); // TODO : Replace links with local ones return std::fs::read_to_string(file_path).unwrap(); } pub fn versions(path: &PathBuf) -> Vec { let mut version_list = Vec::new(); if let Ok(entries) = std::fs::read_dir(path) { for entry in entries { if let Ok(entry) = entry { if let Some(file_name) = entry.file_name().to_str() { version_list.push(file_name.to_string()); } } } } version_list } } impl WebsiteArchive { pub fn new(dir: &str) -> Self { Self { dir: PathBuf::from(dir) } } pub fn get_domain(&self, domain: &str) -> Domain { Domain::new(domain, self.dir.join(domain)) } /// Archive a URL pub fn archive_url(&self, url: &str) { let parsed_url = url::Url::parse(url).unwrap(); let domain = parsed_url.domain().unwrap().trim_start_matches("www"); let path = parsed_url.path(); let mut folder_name = self.dir.join(&domain); for paths in path.split('/') { if !paths.is_empty() { folder_name = folder_name.join(paths); } } std::fs::create_dir_all(&folder_name).unwrap(); let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string(); let filename = folder_name.join(&format!("index_{timestamp}.html")); run_command(&vec![ "monolith", "-I", "-o", filename.to_str().unwrap(), &format!("https://{}/{}", domain, path) ]); } } // full text search // add new sites? // transparent auto page downloading // redownload after threshold fn run_command(cmd: &[&str]) { let mut cmd_setup = std::process::Command::new(cmd[0].clone()); let cmd_setup = cmd_setup.args(cmd.into_iter().skip(1).collect::>()); let child = cmd_setup.spawn().unwrap(); let status = child.wait_with_output().unwrap(); assert!(status.status.success()); }