init
This commit is contained in:
commit
319b663694
11 changed files with 3722 additions and 0 deletions
123
src/archive.rs
Normal file
123
src/archive.rs
Normal file
|
@ -0,0 +1,123 @@
|
|||
use std::{fmt::format, fs::read_to_string, path::{Path, PathBuf}};
|
||||
|
||||
pub struct WebsiteArchive {
|
||||
pub dir: PathBuf
|
||||
}
|
||||
|
||||
pub struct Domain {
|
||||
pub name: String,
|
||||
dir: PathBuf
|
||||
}
|
||||
|
||||
impl Domain {
|
||||
pub fn new(name: &str, dir: PathBuf) -> Self {
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
Self { name: name.to_string(), dir }
|
||||
}
|
||||
|
||||
pub fn path(&self, path: &str) -> Document {
|
||||
Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Document {
|
||||
pub domain: String,
|
||||
pub path: String,
|
||||
base_dir: PathBuf
|
||||
}
|
||||
|
||||
impl Document {
|
||||
pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
|
||||
Self { domain: domain.to_string(), path: path.to_string(), base_dir }
|
||||
}
|
||||
|
||||
pub fn render_local(&self, version: Option<String>) -> String {
|
||||
let mut file_path = self.base_dir.join(&self.domain);
|
||||
|
||||
for p in self.path.split('/') {
|
||||
file_path = file_path.join(p);
|
||||
}
|
||||
|
||||
let latest_version = if let Some(version) = version {
|
||||
format!("index_{version}.html")
|
||||
} else {
|
||||
let versions = Self::versions(&file_path);
|
||||
versions.first().cloned().unwrap()
|
||||
};
|
||||
|
||||
file_path = file_path.join(latest_version);
|
||||
|
||||
// TODO : Replace links with local ones
|
||||
return std::fs::read_to_string(file_path).unwrap();
|
||||
}
|
||||
|
||||
pub fn versions(path: &PathBuf) -> Vec<String> {
|
||||
let mut version_list = Vec::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(path) {
|
||||
for entry in entries {
|
||||
if let Ok(entry) = entry {
|
||||
if let Some(file_name) = entry.file_name().to_str() {
|
||||
version_list.push(file_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
version_list
|
||||
}
|
||||
}
|
||||
|
||||
impl WebsiteArchive {
|
||||
pub fn new(dir: &str) -> Self {
|
||||
Self { dir: PathBuf::from(dir) }
|
||||
}
|
||||
|
||||
pub fn get_domain(&self, domain: &str) -> Domain {
|
||||
Domain::new(domain, self.dir.join(domain))
|
||||
}
|
||||
|
||||
/// Archive a URL
|
||||
pub fn archive_url(&self, url: &str) {
|
||||
let parsed_url = url::Url::parse(url).unwrap();
|
||||
|
||||
let domain = parsed_url.domain().unwrap().trim_start_matches("www");
|
||||
let path = parsed_url.path();
|
||||
|
||||
let mut folder_name = self.dir.join(&domain);
|
||||
|
||||
for paths in path.split('/') {
|
||||
if !paths.is_empty() {
|
||||
folder_name = folder_name.join(paths);
|
||||
}
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(&folder_name).unwrap();
|
||||
|
||||
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
|
||||
let filename = folder_name.join(&format!("index_{timestamp}.html"));
|
||||
|
||||
run_command(&vec![
|
||||
"monolith",
|
||||
"-I",
|
||||
"-o",
|
||||
filename.to_str().unwrap(),
|
||||
&format!("https://{}/{}", domain, path)
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// full text search
|
||||
// add new sites?
|
||||
// transparent auto page downloading
|
||||
// redownload after threshold
|
||||
|
||||
|
||||
fn run_command(cmd: &[&str]) {
|
||||
let mut cmd_setup = std::process::Command::new(cmd[0].clone());
|
||||
let cmd_setup = cmd_setup.args(cmd.into_iter().skip(1).collect::<Vec<_>>());
|
||||
let child = cmd_setup.spawn().unwrap();
|
||||
|
||||
let status = child.wait_with_output().unwrap();
|
||||
assert!(status.status.success());
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue