fix + refactor

2025-01-11 16:21:15 +01:00 · 2025-01-11 16:21:15 +01:00 · 3696f61b02
commit 3696f61b02
parent 56f13c6524
8 changed files with 524 additions and 446 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3569,6 +3569,15 @@ dependencies = [
 "percent-encoding",
 ]
 [[package]]
 name = "url-escape"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "44e0ce4d1246d075ca5abec4b41d33e87a6054d08e2366b63205665e950db218"
 dependencies = [
 "percent-encoding",
 ]
 [[package]]
 name = "utf-8"
 version = "0.7.6"
@ -3752,6 +3761,7 @@ dependencies = [
 "tokio",
 "toml",
 "url",
 "url-escape",
 "uuid",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -24,3 +24,4 @@ pgvector = { version = "0.4", features = ["sqlx"] }
 html2md = "0.2.14"
 clap = { version = "4.5.23", features = ["cargo", "derive"] }
 toml = "0.8.19"
 url-escape = "0.1.1"
--- a/config.toml
+++ b/config.toml
@ -8,7 +8,8 @@ DOWNLOAD_ON_DEMAND=true
 [websites]
 # You can blacklist sites which wont work well
 BLACKLIST_DOMAINS = [
-    "^gitlab" # All domains starting with gitlab
+    "^gitlab", # All domains starting with gitlab
    "youtube" # YouTube
 ]
 # Domain configuration (Example)
@ -56,3 +57,17 @@ no_javascript = true
 [[websites.domains]]
 domain = "github.com"
 no_javascript = true
 [[websites.domains]]
 domain = "en.wikipedia.org"
 no_javascript = true
 [[websites.domains]]
 domain = "api.flutter.dev"
 no_javascript = true
 no_video = true
 [[websites.domains]]
 domain = "docs.flutter.dev"
 no_javascript = true
 no_video = true
--- a/src/archive.rs
+++ b/src/archive.rs
@ -1,441 +0,0 @@
 use std::{collections::HashSet, io::Read, path::PathBuf};
 use based::{request::RequestContext, result::LogAndIgnore};
 use maud::html;
 use crate::{
    blacklist::{check_blacklist, check_blacklist_path},
    conf::get_config,
    favicon::download_fav_for,
    render_page,
 };
 /// Read directory entries into `Vec<String>`
 pub fn read_dir(dir: &PathBuf) -> Vec<String> {
    let mut list = Vec::new();
    if let Ok(entries) = std::fs::read_dir(dir) {
        for entry in entries.flatten() {
            if let Some(file_name) = entry.file_name().to_str() {
                list.push(file_name.to_string());
            }
        }
    }
    list
 }
 /// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
 fn internalize_urls(input: &str) -> String {
    let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
    let re = regex::Regex::new(url_pattern).unwrap();
    re.replace_all(input, |caps: &regex::Captures| {
        let domain = caps[1].trim_start_matches("www.");
        let path = &caps[2];
        // Dont transform if in blacklist
        if check_blacklist(domain) {
            return format!("https://{domain}/{path}");
        }
        format!("/s/{domain}/{path}")
    })
    .to_string()
 }
 /// Extract all domains
 pub fn extract_domains(input: &str) -> Vec<String> {
    let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)?";
    let re = regex::Regex::new(url_pattern).unwrap();
    let mut domains = HashSet::new();
    for caps in re.captures_iter(input) {
        let domain = caps[1].trim_start_matches("www.");
        domains.insert(domain.to_string());
    }
    let mut domains: Vec<_> = domains.into_iter().collect();
    domains.sort();
    domains
 }
 /// Represents a directory containg archived websites
 #[derive(Debug, Clone)]
 pub struct WebsiteArchive {
    pub dir: PathBuf,
 }
 /// Represents a domain within the website archive
 pub struct Domain {
    /// Domain name
    pub name: String,
    dir: PathBuf,
 }
 impl Domain {
    /// Creates a new `Domain` instance.
    ///
    /// If the domain name is not blacklisted, a directory is created.
    ///
    /// # Parameters
    /// - `name`: The name of the domain.
    /// - `dir`: The directory path for the domain.
    ///
    /// # Returns
    /// A new `Domain` instance.
    pub fn new(name: &str, dir: PathBuf) -> Self {
        if !check_blacklist(name) {
            std::fs::create_dir_all(&dir)
                .log_err_and_ignore(&format!("Could not create domain dir {name}"));
        }
        Self {
            name: name.to_string(),
            dir,
        }
    }
    /// Resolves a specific path within the domain and returns a `Document` representing it.
    ///
    /// # Parameters
    /// - `path`: The path to resolve within the domain.
    ///
    /// # Returns
    /// A `Document` instance corresponding to the given path.
    pub fn path(&self, path: &str) -> Document {
        Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf())
    }
    /// Get all paths associated with the domain
    pub fn all_paths(&self) -> Vec<PathEntry> {
        let mut queue = self.paths("/").0;
        let mut ret = Vec::new();
        ret.push(PathEntry(self.name.clone(), "/".to_string()));
        while let Some(el) = queue.pop() {
            ret.push(el.clone());
            let paths = self.paths(&el.1).0;
            queue.extend(paths);
        }
        ret
    }
    /// Retrieves entries and metadata for a given path within the domain.
    ///
    /// # Parameters
    /// - `path`: The path to inspect.
    ///
    /// # Returns
    /// A tuple containing:
    /// - A vector of `PathEntry` instances representing the contents of the path.
    /// - A boolean indicating whether the path is itself a `Document`
    pub fn paths(&self, path: &str) -> (Vec<PathEntry>, bool) {
        let mut base_path = self.dir.clone();
        for p in path.split('/') {
            base_path = base_path.join(p);
        }
        let path = path
            .split("/")
            .filter(|x| !x.is_empty())
            .collect::<Vec<&str>>()
            .join("/");
        let dir_content = read_dir(&base_path);
        let mut ret = Vec::new();
        let mut is_doc = false;
        for entry in dir_content {
            let url_path = format!("{path}/{entry}");
            let url_path = url_path
                .split("/")
                .filter(|x| !x.is_empty())
                .collect::<Vec<&str>>()
                .join("/");
            if entry.starts_with("index_") && entry.ends_with(".html") {
                is_doc = true;
                continue;
            }
            ret.push(PathEntry(self.name.clone(), url_path));
        }
        (ret, is_doc)
    }
 }
 /// Represents an entry within a domain's path, containing its name and URL path.
 #[derive(Debug, Clone)]
 pub struct PathEntry(String, String);
 impl PathEntry {
    pub fn url(&self) -> String {
        format!("/d/{}/{}", self.0, self.1)
    }
    pub fn path(&self) -> &String {
        &self.1
    }
 }
 /// Represents a document within a domain
 pub struct Document {
    /// The domain associated with the document.
    pub domain: String,
    /// The path of the document within the domain.
    pub path: String,
    base_dir: PathBuf,
 }
 impl Document {
    /// Creates a new `Document` instance.
    ///
    /// # Parameters
    /// - `domain`: The domain to which the document belongs.
    /// - `path`: The path of the document within the domain.
    /// - `base_dir`: The base directory of the archive storage.
    ///
    /// # Returns
    /// A new `Document` instance.
    pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
        let split = path
            .split('/')
            .filter(|x| !x.is_empty())
            .collect::<Vec<&str>>();
        Self {
            domain: domain.to_string(),
            path: if split.is_empty() {
                "/".to_string()
            } else {
                split.join("/")
            },
            base_dir,
        }
    }
    /// Renders the document, returning its content as a string.
    ///
    /// If the environment variable `$ROUTE_INTERNAL` is set to `true`, all links will be rewritten to point to internal archived routes.
    ///
    /// # Parameters
    /// - `version`: An optional version of the document to render in the format `YYYY-MM-DD`.
    ///
    /// # Returns
    /// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
    pub async fn render_local(&self, version: Option<String>) -> Option<String> {
        if check_blacklist(&self.domain) {
            let content = html! {
                h3 { "This site is blacklisted" };
            };
            return Some(render_page(content, RequestContext::default()).await.1 .1);
        }
        let mut file_path = self.doc_dir();
        let latest_version = if let Some(version) = version {
            format!("index_{version}.html")
        } else {
            let versions = self.versions();
            let version = versions.first().cloned()?;
            format!("index_{version}.html")
        };
        file_path = file_path.join(latest_version);
        let mut buf = Vec::new();
        std::fs::File::open(file_path)
            .ok()?
            .read_to_end(&mut buf)
            .unwrap();
        let content = String::from_utf8_lossy(&buf);
        if get_config().ROUTE_INTERNAL {
            Some(internalize_urls(&content))
        } else {
            Some(content.to_string())
        }
    }
    /// Determines the directory where the document is stored.
    ///
    /// # Returns
    /// A `PathBuf` representing the document directory.
    pub fn doc_dir(&self) -> PathBuf {
        let mut file_path = self.base_dir.join(&self.domain);
        for p in self.path.split('/').filter(|x| !x.is_empty()) {
            file_path = file_path.join(p);
        }
        file_path
    }
    /// Retrieves available versions of the document.
    ///
    /// # Returns
    /// A vector of strings representing the available versions of the document, sorted in descending order.
    pub fn versions(&self) -> Vec<String> {
        let mut res: Vec<String> = read_dir(&self.doc_dir())
            .into_iter()
            .filter_map(|x| {
                if x.starts_with("index_") && x.ends_with(".html") {
                    return Some(
                        x.trim_start_matches("index_")
                            .trim_end_matches(".html")
                            .to_string(),
                    );
                }
                None
            })
            .collect();
        res.sort();
        res.reverse();
        res
    }
 }
 impl WebsiteArchive {
    /// Creates a new `WebsiteArchive` instance.
    ///
    /// # Parameters
    /// - `dir`: The directory path where the archive will be stored.
    ///
    /// # Returns
    /// A new `WebsiteArchive` instance.
    pub fn new(dir: &str) -> Self {
        Self {
            dir: PathBuf::from(dir),
        }
    }
    /// Retrieves the list of domain names stored in the archive.
    ///
    /// # Returns
    /// A vector of domain names as strings.
    pub fn domains(&self) -> Vec<String> {
        read_dir(&self.dir)
    }
    /// Retrieves a `Domain` instance for a specified domain name.
    ///
    /// # Parameters
    /// - `domain`: The name of the domain to retrieve.
    ///
    /// # Returns
    /// A `Domain` instance corresponding to the specified domain.
    pub fn get_domain(&self, domain: &str) -> Domain {
        Domain::new(domain, self.dir.join(domain))
    }
    /// Archives a URL by downloading and storing its content.
    ///
    /// If the URL does not pass the blacklist check, it will not be archived.
    ///
    /// # Parameters
    /// - `url`: The URL to archive.
    ///
    /// This function downloads the content of the URL, processes it, and saves it to the archive.
    pub async fn archive_url(&self, url: &str) {
        let parsed_url = url::Url::parse(url).unwrap();
        let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
        // Deny blacklist
        if check_blacklist(domain) {
            return;
        }
        let path = parsed_url.path();
        if check_blacklist_path(domain, path) {
            return;
        }
        let mut folder_name = self.dir.join(domain);
        download_fav_for(domain).await;
        for paths in path.split('/') {
            if !paths.is_empty() {
                folder_name = folder_name.join(paths);
            }
        }
        std::fs::create_dir_all(&folder_name).unwrap();
        let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
        let filename = folder_name.join(format!("index_{timestamp}.html"));
        log::info!("Archiving {url} to {}", filename.to_str().unwrap());
        let conf = get_config()
            .get_domain_config(domain)
            .cloned()
            .unwrap_or_default();
        let mut cmd = vec!["monolith", "--isolate", "-o", filename.to_str().unwrap()];
        if conf.no_audio.unwrap_or_default() {
            cmd.push("--no-audio");
        }
        if conf.no_css.unwrap_or_default() {
            cmd.push("--no-css");
        }
        if conf.no_frames.unwrap_or_default() {
            cmd.push("--no-frames");
        }
        if conf.no_fonts.unwrap_or_default() {
            cmd.push("--no-frames");
        }
        if conf.no_image.unwrap_or_default() {
            cmd.push("--no-images");
        }
        if conf.no_javascript.unwrap_or_default() {
            cmd.push("--no-js");
            cmd.push("--unwrap-noscript");
        }
        if conf.no_video.unwrap_or_default() {
            cmd.push("--no-video");
        }
        if let Some(ua) = &conf.user_agent {
            cmd.push("--user-agent");
            cmd.push(ua.as_str());
        }
        let mut url = url::Url::parse(&format!("https://{domain}")).unwrap();
        url = url.join(path).unwrap();
        let url = url.to_string();
        cmd.push(&url);
        run_command(&cmd);
    }
 }
 fn run_command(cmd: &[&str]) {
    let mut cmd_setup = std::process::Command::new(cmd[0]);
    let cmd_setup = cmd_setup
        .args(cmd.iter().skip(1).collect::<Vec<_>>())
        .stdout(std::process::Stdio::inherit())
        .stderr(std::process::Stdio::inherit());
    let child = cmd_setup.spawn().unwrap();
    let status = child.wait_with_output().unwrap();
    assert!(status.status.success());
 }
--- a/src/archive/document.rs
+++ b/src/archive/document.rs
@ -0,0 +1,126 @@
 use std::{io::Read, path::PathBuf};
 use based::request::RequestContext;
 use maud::html;
 use crate::{blacklist::check_blacklist, conf::get_config, render_page};
 use super::{internalize_urls, read_dir};
 /// Represents a document within a domain
 pub struct Document {
    /// The domain associated with the document.
    pub domain: String,
    /// The path of the document within the domain.
    pub path: String,
    base_dir: PathBuf,
 }
 impl Document {
    /// Creates a new `Document` instance.
    ///
    /// # Parameters
    /// - `domain`: The domain to which the document belongs.
    /// - `path`: The path of the document within the domain.
    /// - `base_dir`: The base directory of the archive storage.
    ///
    /// # Returns
    /// A new `Document` instance.
    pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
        let split = path
            .split('/')
            .filter(|x| !x.is_empty())
            .collect::<Vec<&str>>();
        Self {
            domain: domain.to_string(),
            path: if split.is_empty() {
                "/".to_string()
            } else {
                split.join("/")
            },
            base_dir,
        }
    }
    /// Renders the document, returning its content as a string.
    ///
    /// If the environment variable `$ROUTE_INTERNAL` is set to `true`, all links will be rewritten to point to internal archived routes.
    ///
    /// # Parameters
    /// - `version`: An optional version of the document to render in the format `YYYY-MM-DD`.
    ///
    /// # Returns
    /// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
    pub async fn render_local(&self, version: Option<String>) -> Option<String> {
        if check_blacklist(&self.domain) {
            let content = html! {
                h3 { "This site is blacklisted" };
            };
            return Some(render_page(content, RequestContext::default()).await.1 .1);
        }
        let mut file_path = self.doc_dir();
        let latest_version = if let Some(version) = version {
            format!("index_{version}.html")
        } else {
            let versions = self.versions();
            let version = versions.first().cloned()?;
            format!("index_{version}.html")
        };
        file_path = file_path.join(latest_version);
        let mut buf = Vec::new();
        std::fs::File::open(file_path)
            .ok()?
            .read_to_end(&mut buf)
            .unwrap();
        let content = String::from_utf8_lossy(&buf);
        if get_config().ROUTE_INTERNAL {
            Some(internalize_urls(&content))
        } else {
            Some(content.to_string())
        }
    }
    /// Determines the directory where the document is stored.
    ///
    /// # Returns
    /// A `PathBuf` representing the document directory.
    pub fn doc_dir(&self) -> PathBuf {
        let mut file_path = self.base_dir.join(&self.domain);
        for p in self.path.split('/').filter(|x| !x.is_empty()) {
            file_path = file_path.join(p);
        }
        file_path
    }
    /// Retrieves available versions of the document.
    ///
    /// # Returns
    /// A vector of strings representing the available versions of the document, sorted in descending order.
    pub fn versions(&self) -> Vec<String> {
        let mut res: Vec<String> = read_dir(&self.doc_dir())
            .into_iter()
            .filter_map(|x| {
                if x.starts_with("index_") && x.ends_with(".html") {
                    return Some(
                        x.trim_start_matches("index_")
                            .trim_end_matches(".html")
                            .to_string(),
                    );
                }
                None
            })
            .collect();
        res.sort();
        res.reverse();
        res
    }
 }
--- a/src/archive/domain.rs
+++ b/src/archive/domain.rs
@ -0,0 +1,126 @@
 use std::path::PathBuf;
 use based::result::LogAndIgnore;
 use crate::blacklist::check_blacklist;
 use super::{read_dir, Document};
 /// Represents a domain within the website archive
 pub struct Domain {
    /// Domain name
    pub name: String,
    dir: PathBuf,
 }
 impl Domain {
    /// Creates a new `Domain` instance.
    ///
    /// If the domain name is not blacklisted, a directory is created.
    ///
    /// # Parameters
    /// - `name`: The name of the domain.
    /// - `dir`: The directory path for the domain.
    ///
    /// # Returns
    /// A new `Domain` instance.
    pub fn new(name: &str, dir: PathBuf) -> Self {
        if !check_blacklist(name) {
            std::fs::create_dir_all(&dir)
                .log_err_and_ignore(&format!("Could not create domain dir {name}"));
        }
        Self {
            name: name.to_string(),
            dir,
        }
    }
    /// Resolves a specific path within the domain and returns a `Document` representing it.
    ///
    /// # Parameters
    /// - `path`: The path to resolve within the domain.
    ///
    /// # Returns
    /// A `Document` instance corresponding to the given path.
    pub fn path(&self, path: &str) -> Document {
        Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf())
    }
    /// Get all paths associated with the domain
    pub fn all_paths(&self) -> Vec<PathEntry> {
        let mut queue = self.paths("/").0;
        let mut ret = Vec::new();
        ret.push(PathEntry(self.name.clone(), "/".to_string()));
        while let Some(el) = queue.pop() {
            ret.push(el.clone());
            let paths = self.paths(&el.1).0;
            queue.extend(paths);
        }
        ret
    }
    /// Retrieves entries and metadata for a given path within the domain.
    ///
    /// # Parameters
    /// - `path`: The path to inspect.
    ///
    /// # Returns
    /// A tuple containing:
    /// - A vector of `PathEntry` instances representing the contents of the path.
    /// - A boolean indicating whether the path is itself a `Document`
    pub fn paths(&self, path: &str) -> (Vec<PathEntry>, bool) {
        let mut base_path = self.dir.clone();
        for p in path.split('/') {
            base_path = base_path.join(p);
        }
        let path = path
            .split("/")
            .filter(|x| !x.is_empty())
            .collect::<Vec<&str>>()
            .join("/");
        let dir_content = read_dir(&base_path);
        let mut ret = Vec::new();
        let mut is_doc = false;
        for entry in dir_content {
            let url_path = format!("{path}/{entry}");
            let url_path = url_path
                .split("/")
                .filter(|x| !x.is_empty())
                .collect::<Vec<&str>>()
                .join("/");
            if entry.starts_with("index_") && entry.ends_with(".html") {
                is_doc = true;
                continue;
            }
            ret.push(PathEntry(self.name.clone(), url_path));
        }
        (ret, is_doc)
    }
 }
 /// Represents an entry within a domain's path, containing its name and URL path.
 #[derive(Debug, Clone)]
 pub struct PathEntry(String, String);
 impl PathEntry {
    pub fn url(&self) -> String {
        format!("/d/{}/{}", self.0, self.1)
    }
    pub fn path(&self) -> &String {
        &self.1
    }
 }
--- a/src/archive/mod.rs
+++ b/src/archive/mod.rs
@ -0,0 +1,207 @@
 use std::{collections::HashSet, path::PathBuf};
 use crate::{
    blacklist::{check_blacklist, check_blacklist_path},
    conf::get_config,
    favicon::download_fav_for
 };
 mod document;
 mod domain;
 pub use document::Document;
 pub use domain::*;
 /// Read directory entries into `Vec<String>`
 pub fn read_dir(dir: &PathBuf) -> Vec<String> {
    let mut list = Vec::new();
    if let Ok(entries) = std::fs::read_dir(dir) {
        for entry in entries.flatten() {
            if let Some(file_name) = entry.file_name().to_str() {
                list.push(file_name.to_string());
            }
        }
    }
    list
 }
 /// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
 fn internalize_urls(input: &str) -> String {
    let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
    let re = regex::Regex::new(url_pattern).unwrap();
    re.replace_all(input, |caps: &regex::Captures| {
        let domain = caps[1].trim_start_matches("www.");
        let path = &caps[2];
        // Dont transform if in blacklist
        if check_blacklist(domain) {
            return format!("https://{domain}/{path}");
        }
        format!("/s/{domain}/{path}")
    })
    .to_string()
 }
 /// Extract all domains
 pub fn extract_domains(input: &str) -> Vec<String> {
    let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)?";
    let re = regex::Regex::new(url_pattern).unwrap();
    let mut domains = HashSet::new();
    for caps in re.captures_iter(input) {
        let domain = caps[1].trim_start_matches("www.");
        domains.insert(domain.to_string());
    }
    let mut domains: Vec<_> = domains.into_iter().collect();
    domains.sort();
    domains
 }
 /// Represents a directory containg archived websites
 #[derive(Debug, Clone)]
 pub struct WebsiteArchive {
    pub dir: PathBuf,
 }
 impl WebsiteArchive {
    /// Creates a new `WebsiteArchive` instance.
    ///
    /// # Parameters
    /// - `dir`: The directory path where the archive will be stored.
    ///
    /// # Returns
    /// A new `WebsiteArchive` instance.
    pub fn new(dir: &str) -> Self {
        Self {
            dir: PathBuf::from(dir),
        }
    }
    /// Retrieves the list of domain names stored in the archive.
    ///
    /// # Returns
    /// A vector of domain names as strings.
    pub fn domains(&self) -> Vec<String> {
        read_dir(&self.dir)
    }
    /// Retrieves a `Domain` instance for a specified domain name.
    ///
    /// # Parameters
    /// - `domain`: The name of the domain to retrieve.
    ///
    /// # Returns
    /// A `Domain` instance corresponding to the specified domain.
    pub fn get_domain(&self, domain: &str) -> Domain {
        Domain::new(domain, self.dir.join(domain))
    }
    /// Archives a URL by downloading and storing its content.
    ///
    /// If the URL does not pass the blacklist check, it will not be archived.
    ///
    /// # Parameters
    /// - `url`: The URL to archive.
    ///
    /// This function downloads the content of the URL, processes it, and saves it to the archive.
    pub async fn archive_url(&self, url: &str) {
        let parsed_url = url::Url::parse(url).unwrap();
        let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
        // Deny blacklist
        if check_blacklist(domain) {
            return;
        }
        let path = parsed_url.path();
        if check_blacklist_path(domain, path) {
            return;
        }
        let mut folder_name = self.dir.join(domain);
        download_fav_for(domain).await;
        for paths in path.split('/') {
            let paths = url_escape::decode(paths).to_string();
            if !paths.is_empty() {
                folder_name = folder_name.join(paths);
            }
        }
        std::fs::create_dir_all(&folder_name).unwrap();
        let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
        let filename = folder_name.join(format!("index_{timestamp}.html"));
        log::info!("Archiving {url} to {}", filename.to_str().unwrap());
        let conf = get_config()
            .get_domain_config(domain)
            .cloned()
            .unwrap_or_default();
        let mut cmd = vec!["monolith", "--isolate", "-o", filename.to_str().unwrap()];
        if conf.no_audio.unwrap_or_default() {
            cmd.push("--no-audio");
        }
        if conf.no_css.unwrap_or_default() {
            cmd.push("--no-css");
        }
        if conf.no_frames.unwrap_or_default() {
            cmd.push("--no-frames");
        }
        if conf.no_fonts.unwrap_or_default() {
            cmd.push("--no-frames");
        }
        if conf.no_image.unwrap_or_default() {
            cmd.push("--no-images");
        }
        if conf.no_javascript.unwrap_or_default() {
            cmd.push("--no-js");
            cmd.push("--unwrap-noscript");
        }
        if conf.no_video.unwrap_or_default() {
            cmd.push("--no-video");
        }
        if let Some(ua) = &conf.user_agent {
            cmd.push("--user-agent");
            cmd.push(ua.as_str());
        }
        let mut url = url::Url::parse(&format!("https://{domain}")).unwrap();
        url = url.join(path).unwrap();
        let url = url.to_string();
        cmd.push(&url);
        run_command(&cmd);
    }
 }
 fn run_command(cmd: &[&str]) {
    let mut cmd_setup = std::process::Command::new(cmd[0]);
    let cmd_setup = cmd_setup
        .args(cmd.iter().skip(1).collect::<Vec<_>>())
        .stdout(std::process::Stdio::inherit())
        .stderr(std::process::Stdio::inherit());
    let child = cmd_setup.spawn().unwrap();
    let status = child.wait_with_output().unwrap();
    assert!(status.status.success());
 }
--- a/src/pages/mod.rs
+++ b/src/pages/mod.rs
@ -7,7 +7,7 @@ use based::{
    },
 };
 use maud::{html, PreEscaped};
-use rocket::{get, State};
+use rocket::{get, request::FromSegments, State};
 pub mod component;
 use component::*;
@ -174,15 +174,49 @@ pub async fn render_txt_website(
    Some(html2md::parse_html(&content))
 }
 pub struct PathSegment {
    segments: Vec<String>
 }
 impl PathSegment {
    pub fn to_str(&self) -> String {
        self.segments.join("/")
    }
 }
 impl<'r> FromSegments<'r> for PathSegment {
    type Error = ();
    fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result<Self, Self::Error> {
        let paths: Vec<_> = segments
        .filter_map(|x| {
            if x == "." {
                return None;
            }
            if x == ".." {
                return None
            }
            Some(x.to_string())
        })
        .collect();
        Ok(PathSegment {
            segments: paths
        })
    }
 }
 /// Return archived version of `domain` / `path` at `time`
 #[get("/s/<domain>/<path..>?<time>")]
 pub async fn render_website(
    domain: &str,
-    path: PathBuf,
+    path: PathSegment,
    time: Option<&str>,
    arc: &State<WebsiteArchive>,
 ) -> Option<DataResponse> {
-    let document = arc.get_domain(domain).path(path.to_str().unwrap());
+    let document = arc.get_domain(domain).path(&path.to_str());
    let content = document
        .render_local(time.map(|time| time.to_string()))
@ -195,7 +229,7 @@ pub async fn render_website(
            Some(60 * 60 * 24),
        ));
    } else if get_config().DOWNLOAD_ON_DEMAND && time.is_none() {
-        arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
+        arc.archive_url(&format!("https://{domain}/{}", path.to_str()))
            .await;
        let content = document.render_local(None).await?;