This commit is contained in:
parent
d8ca94bd0b
commit
2f83d5f136
3 changed files with 41 additions and 16 deletions
22
Cargo.lock
generated
22
Cargo.lock
generated
|
@ -3082,7 +3082,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
|
checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "watchdogs"
|
name = "web-sys"
|
||||||
|
version = "0.3.76"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webarc"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"based",
|
"based",
|
||||||
|
@ -3106,16 +3116,6 @@ dependencies = [
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "web-sys"
|
|
||||||
version = "0.3.76"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc"
|
|
||||||
dependencies = [
|
|
||||||
"js-sys",
|
|
||||||
"wasm-bindgen",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "whoami"
|
name = "whoami"
|
||||||
version = "1.5.2"
|
version = "1.5.2"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[package]
|
[package]
|
||||||
name = "watchdogs"
|
name = "webarc"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,20 @@ use std::{
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
fn internalize_urls(input: &str) -> String {
|
||||||
|
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
|
||||||
|
let re = regex::Regex::new(url_pattern).unwrap();
|
||||||
|
|
||||||
|
re.replace_all(input, |caps: ®ex::Captures| {
|
||||||
|
format!(
|
||||||
|
"/s/{}/{}",
|
||||||
|
&caps[1].trim_start_matches("www."), // Domain
|
||||||
|
&caps[2] // Path
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
pub struct WebsiteArchive {
|
pub struct WebsiteArchive {
|
||||||
pub dir: PathBuf,
|
pub dir: PathBuf,
|
||||||
}
|
}
|
||||||
|
@ -58,8 +72,13 @@ impl Document {
|
||||||
|
|
||||||
file_path = file_path.join(latest_version);
|
file_path = file_path.join(latest_version);
|
||||||
|
|
||||||
// TODO : Replace links with local ones
|
let content = std::fs::read_to_string(file_path).ok()?;
|
||||||
return std::fs::read_to_string(file_path).ok();
|
|
||||||
|
if std::env::var("ROUTE_INTERNAL").unwrap_or("false".to_string()) == "true" {
|
||||||
|
Some(internalize_urls(&content))
|
||||||
|
} else {
|
||||||
|
Some(content)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn versions(path: &PathBuf) -> Vec<String> {
|
pub fn versions(path: &PathBuf) -> Vec<String> {
|
||||||
|
@ -94,7 +113,7 @@ impl WebsiteArchive {
|
||||||
pub fn archive_url(&self, url: &str) {
|
pub fn archive_url(&self, url: &str) {
|
||||||
let parsed_url = url::Url::parse(url).unwrap();
|
let parsed_url = url::Url::parse(url).unwrap();
|
||||||
|
|
||||||
let domain = parsed_url.domain().unwrap().trim_start_matches("www");
|
let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
|
||||||
let path = parsed_url.path();
|
let path = parsed_url.path();
|
||||||
|
|
||||||
let mut folder_name = self.dir.join(&domain);
|
let mut folder_name = self.dir.join(&domain);
|
||||||
|
@ -110,6 +129,8 @@ impl WebsiteArchive {
|
||||||
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
|
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
|
||||||
let filename = folder_name.join(&format!("index_{timestamp}.html"));
|
let filename = folder_name.join(&format!("index_{timestamp}.html"));
|
||||||
|
|
||||||
|
log::info!("Archiving {url} to {}", filename.to_str().unwrap());
|
||||||
|
|
||||||
run_command(&vec![
|
run_command(&vec![
|
||||||
"monolith",
|
"monolith",
|
||||||
"-I",
|
"-I",
|
||||||
|
@ -127,7 +148,11 @@ impl WebsiteArchive {
|
||||||
|
|
||||||
fn run_command(cmd: &[&str]) {
|
fn run_command(cmd: &[&str]) {
|
||||||
let mut cmd_setup = std::process::Command::new(cmd[0].clone());
|
let mut cmd_setup = std::process::Command::new(cmd[0].clone());
|
||||||
let cmd_setup = cmd_setup.args(cmd.into_iter().skip(1).collect::<Vec<_>>());
|
let cmd_setup = cmd_setup
|
||||||
|
.args(cmd.into_iter().skip(1).collect::<Vec<_>>())
|
||||||
|
.stdout(std::process::Stdio::inherit())
|
||||||
|
.stderr(std::process::Stdio::inherit());
|
||||||
|
|
||||||
let child = cmd_setup.spawn().unwrap();
|
let child = cmd_setup.spawn().unwrap();
|
||||||
|
|
||||||
let status = child.wait_with_output().unwrap();
|
let status = child.wait_with_output().unwrap();
|
||||||
|
|
Loading…
Add table
Reference in a new issue