From 654d4b9cba31a317b87a3a528dd5e1cc12176dc5 Mon Sep 17 00:00:00 2001
From: JMARyA <jmarya@hydrar.de>
Date: Mon, 30 Dec 2024 09:57:42 +0100
Subject: [PATCH] refactor

---
 Cargo.lock             |   5 --
 Cargo.toml             |   5 --
 src/archive.rs         | 106 ++++++++++++++++++++++++++++++++++------
 src/blacklist.rs       |   4 ++
 src/favicon.rs         |  12 +++++
 src/pages/component.rs |  88 +++++++++++++++++++++++++++++++++
 src/pages/mod.rs       | 107 ++++++++++-------------------------------
 7 files changed, 222 insertions(+), 105 deletions(-)
 create mode 100644 src/pages/component.rs

diff --git a/Cargo.lock b/Cargo.lock
index b95557d..10e9ca6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3384,22 +3384,17 @@ dependencies = [
  "chrono",
  "env_logger",
  "futures",
- "hex",
  "log",
  "maud",
- "rayon",
  "regex",
  "reqwest 0.12.11",
- "ring 0.16.20",
  "rocket",
- "rocket_cors",
  "serde",
  "serde_json",
  "sqlx",
  "tokio",
  "url",
  "uuid",
- "walkdir",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 1d1f20e..cc5a0f8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,16 +5,11 @@ edition = "2021"
 
 [dependencies]
 env_logger = "0.10.0"
-hex = "0.4.3"
-rayon = "1.7.0"
 regex = "1.9.5"
-ring = "0.16.20"
-walkdir = "2.4.0"
 chrono = { version = "0.4.38", features = ["serde"] }
 futures = "0.3.30"
 log = "0.4.20"
 rocket = { version = "0.5.1", features = ["json"] }
-rocket_cors = "0.6.0"
 serde = { version = "1.0.195", features = ["derive"] }
 serde_json = "1.0.111"
 tokio = { version = "1.35.1", features = ["full"] }
diff --git a/src/archive.rs b/src/archive.rs
index e784a50..7fc566c 100644
--- a/src/archive.rs
+++ b/src/archive.rs
@@ -3,17 +3,16 @@ use std::path::PathBuf;
 use based::request::RequestContext;
 use maud::html;
 
-use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page};
+use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::component::render_page};
 
+/// Read directory entries into `Vec<String>`
 pub fn read_dir(dir: &PathBuf) -> Vec<String> {
     let mut list = Vec::new();
 
     if let Ok(entries) = std::fs::read_dir(dir) {
-        for entry in entries {
-            if let Ok(entry) = entry {
-                if let Some(file_name) = entry.file_name().to_str() {
-                    list.push(file_name.to_string());
-                }
+        for entry in entries.flatten() {
+            if let Some(file_name) = entry.file_name().to_str() {
+                list.push(file_name.to_string());
             }
         }
     }
@@ -21,6 +20,7 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
     list
 }
 
+/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
 fn internalize_urls(input: &str) -> String {
     let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
     let re = regex::Regex::new(url_pattern).unwrap();
@@ -35,17 +35,30 @@ fn internalize_urls(input: &str) -> String {
     .to_string()
 }
 
+/// Represents a directory containg archived websites
 #[derive(Debug, Clone)]
 pub struct WebsiteArchive {
     pub dir: PathBuf,
 }
 
+/// Represents a domain within the website archive
 pub struct Domain {
+    /// Domain name
     pub name: String,
     dir: PathBuf,
 }
 
 impl Domain {
+    /// Creates a new `Domain` instance.
+    ///
+    /// If the domain name is not blacklisted, a directory is created.
+    ///
+    /// # Parameters
+    /// - `name`: The name of the domain.
+    /// - `dir`: The directory path for the domain.
+    ///
+    /// # Returns
+    /// A new `Domain` instance.
     pub fn new(name: &str, dir: PathBuf) -> Self {
         if !check_blacklist(name) {
             std::fs::create_dir_all(&dir).unwrap();
@@ -56,10 +69,26 @@ impl Domain {
         }
     }
 
+    /// Resolves a specific path within the domain and returns a `Document` representing it.
+    ///
+    /// # Parameters
+    /// - `path`: The path to resolve within the domain.
+    ///
+    /// # Returns
+    /// A `Document` instance corresponding to the given path.
     pub fn path(&self, path: &str) -> Document {
         Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf())
     }
 
+    /// Retrieves entries and metadata for a given path within the domain.
+    ///
+    /// # Parameters
+    /// - `path`: The path to inspect.
+    ///
+    /// # Returns
+    /// A tuple containing:
+    /// - A vector of `PathEntry` instances representing the contents of the path.
+    /// - A boolean indicating whether the path is itself a `Document`
     pub fn paths(&self, path: &str) -> (Vec<PathEntry>, bool) {
         let mut base_path = self.dir.clone();
 
@@ -87,6 +116,7 @@ impl Domain {
     }
 }
 
+/// Represents an entry within a domain's path, containing its name and URL path.
 pub struct PathEntry(String, String);
 
 impl PathEntry {
@@ -99,13 +129,25 @@ impl PathEntry {
     }
 }
 
+/// Represents a document within a domain
 pub struct Document {
+    /// The domain associated with the document.
     pub domain: String,
+    /// The path of the document within the domain.
     pub path: String,
     base_dir: PathBuf,
 }
 
 impl Document {
+    /// Creates a new `Document` instance.
+    ///
+    /// # Parameters
+    /// - `domain`: The domain to which the document belongs.
+    /// - `path`: The path of the document within the domain.
+    /// - `base_dir`: The base directory of the archive storage.
+    ///
+    /// # Returns
+    /// A new `Document` instance.
     pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
         Self {
             domain: domain.to_string(),
@@ -114,10 +156,15 @@ impl Document {
         }
     }
 
-    pub fn url(&self) -> String {
-        format!("/s/{}/{}", self.domain, self.path)
-    }
-
+    /// Renders the document, returning its content as a string.
+    ///
+    /// If the environment variable `$ROUTE_INTERNAL` is set to `true`, all links will be rewritten to point to internal archived routes.
+    ///
+    /// # Parameters
+    /// - `version`: An optional version of the document to render in the format `YYYY-MM-DD`.
+    ///
+    /// # Returns
+    /// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
     pub async fn render_local(&self, version: Option<String>) -> Option<String> {
         if check_blacklist(&self.domain) {
             let content = html! {
@@ -146,6 +193,10 @@ impl Document {
         }
     }
 
+    /// Determines the directory where the document is stored.
+    ///
+    /// # Returns
+    /// A `PathBuf` representing the document directory.
     pub fn doc_dir(&self) -> PathBuf {
         let mut file_path = self.base_dir.join(&self.domain);
 
@@ -156,6 +207,10 @@ impl Document {
         file_path
     }
 
+    /// Retrieves available versions of the document.
+    ///
+    /// # Returns
+    /// A vector of strings representing the available versions of the document, sorted in descending order.
     pub fn versions(&self) -> Vec<String> {
         let mut res: Vec<String> = read_dir(&self.doc_dir())
             .into_iter()
@@ -168,21 +223,46 @@ impl Document {
 }
 
 impl WebsiteArchive {
+    /// Creates a new `WebsiteArchive` instance.
+    ///
+    /// # Parameters
+    /// - `dir`: The directory path where the archive will be stored.
+    ///
+    /// # Returns
+    /// A new `WebsiteArchive` instance.
     pub fn new(dir: &str) -> Self {
         Self {
             dir: PathBuf::from(dir),
         }
     }
 
+    /// Retrieves the list of domain names stored in the archive.
+    ///
+    /// # Returns
+    /// A vector of domain names as strings.
     pub fn domains(&self) -> Vec<String> {
         read_dir(&self.dir)
     }
 
+    /// Retrieves a `Domain` instance for a specified domain name.
+    ///
+    /// # Parameters
+    /// - `domain`: The name of the domain to retrieve.
+    ///
+    /// # Returns
+    /// A `Domain` instance corresponding to the specified domain.
     pub fn get_domain(&self, domain: &str) -> Domain {
         Domain::new(domain, self.dir.join(domain))
     }
 
-    /// Archive a URL
+    /// Archives a URL by downloading and storing its content.
+    ///
+    /// If the URL does not pass the blacklist check, it will not be archived.
+    ///
+    /// # Parameters
+    /// - `url`: The URL to archive.
+    ///
+    /// This function downloads the content of the URL, processes it, and saves it to the archive.
     pub async fn archive_url(&self, url: &str) {
         let parsed_url = url::Url::parse(url).unwrap();
 
@@ -197,9 +277,7 @@ impl WebsiteArchive {
 
         let mut folder_name = self.dir.join(domain);
 
-        if !std::fs::exists(&folder_name).unwrap() {
-            download_fav_for(domain).await;
-        }
+        download_fav_for(domain).await;
 
         for paths in path.split('/') {
             if !paths.is_empty() {
diff --git a/src/blacklist.rs b/src/blacklist.rs
index 67d480b..84ea10f 100644
--- a/src/blacklist.rs
+++ b/src/blacklist.rs
@@ -1,3 +1,7 @@
+/// Checks if a domain is present in the blacklist of unwanted domains.
+///
+/// This function checks the `$BLACKLIST_DOMAINS` environment variable for a comma-separated list of regular expressions to match against.
+/// If a match is found, it immediately returns `true`. Otherwise, it returns `false`.
 pub fn check_blacklist(domain: &str) -> bool {
     let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default();
 
diff --git a/src/favicon.rs b/src/favicon.rs
index bfe0e45..ee5c28c 100644
--- a/src/favicon.rs
+++ b/src/favicon.rs
@@ -1,3 +1,12 @@
+/// Downloads a favicon for the given domain.
+///
+/// # Parameters
+///
+/// * `domain`: The domain for which to download the favicon.
+///
+/// # Returns
+///
+/// A `Vec<u8>` containing the favicon data, or `None` if an error occurred.
 pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
     let mut favicon_url = url::Url::parse(&format!("https://{}", domain)).ok()?;
     favicon_url.set_path("/favicon.ico");
@@ -15,6 +24,9 @@ pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
     Some(favicon_data)
 }
 
+/// Downloads a favicon for `site` and stores it.
+///
+/// This will not download a favicon if it is already present.
 pub async fn download_fav_for(site: &str) {
     if let Some(fav) = download_favicon(site).await {
         let fav_path = std::path::Path::new("./favicon").join(site);
diff --git a/src/pages/component.rs b/src/pages/component.rs
new file mode 100644
index 0000000..a7931ba
--- /dev/null
+++ b/src/pages/component.rs
@@ -0,0 +1,88 @@
+use based::{
+    page::Shell,
+    request::{RequestContext, StringResponse},
+};
+use maud::{html, PreEscaped};
+
+/// Generates an SVG arrow icon with the specified color.
+///
+/// # Parameters
+/// - `color`: The color of the arrow icon.
+///
+/// # Returns
+/// A `PreEscaped<String>` containing the SVG markup for the arrow icon.
+pub fn arrow_icon(color: &str) -> PreEscaped<String> {
+    html! {
+        svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" {
+            path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {};
+        };
+    }
+}
+
+/// Generates a styled slash separator.
+///
+/// # Returns
+/// A `PreEscaped<String>` containing the HTML markup for a slash separator.
+pub fn slash_seperator() -> PreEscaped<String> {
+    html! {
+        p class="font-bold p-2 text-gray-400" { " / " };
+    }
+}
+
+/// Generates a hyperlink for a specific path within a domain.
+///
+/// # Parameters
+/// - `path`: The path segment to link.
+/// - `index`: The index of the current path segment in the hierarchy.
+/// - `path_seperations`: The array of all path segments in the hierarchy.
+/// - `domain`: The domain to which the path belongs.
+///
+/// # Returns
+/// A `PreEscaped<String>` containing the HTML markup for the hyperlink.
+pub fn gen_path_link(
+    path: &str,
+    index: usize,
+    path_seperations: &[&str],
+    domain: &str,
+) -> PreEscaped<String> {
+    let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect();
+    html! {
+        a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)}
+    }
+}
+
+/// Generates a breadcrumb-like header for a path within a domain.
+///
+/// # Parameters
+/// - `path_seperations`: A vector of path segments representing the hierarchy.
+/// - `domain`: The domain to which the path belongs.
+///
+/// # Returns
+/// A `PreEscaped<String>` containing the HTML markup for the path header.
+pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped<String> {
+    html! {
+        @for (index, path) in path_seperations.iter().enumerate() {
+            (gen_path_link(path, index, &path_seperations, domain))
+            @if index < path_seperations.len()-1 {
+                (slash_seperator())
+            };
+        };
+    }
+}
+
+pub async fn render_page(content: PreEscaped<String>, ctx: RequestContext) -> StringResponse {
+    based::page::render_page(
+        content,
+        "Website Archive",
+        ctx,
+        &Shell::new(
+            html! {
+                script src="https://cdn.tailwindcss.com" {};
+                meta name="viewport" content="width=device-width, initial-scale=1.0" {};
+            },
+            html! {},
+            Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()),
+        ),
+    )
+    .await
+}
diff --git a/src/pages/mod.rs b/src/pages/mod.rs
index b2809a5..4b1b502 100644
--- a/src/pages/mod.rs
+++ b/src/pages/mod.rs
@@ -1,31 +1,15 @@
 use std::{io::Read, path::PathBuf};
 
-use based::{
-    page::Shell,
-    request::{assets::DataResponse, RequestContext, StringResponse},
-};
-use maud::{html, PreEscaped};
+use based::request::{assets::DataResponse, RequestContext, StringResponse};
+use maud::html;
 use rocket::{get, State};
 
+pub mod component;
+use component::*;
+
 use crate::archive::WebsiteArchive;
 
-pub async fn render_page(content: PreEscaped<String>, ctx: RequestContext) -> StringResponse {
-    based::page::render_page(
-        content,
-        "Website Archive",
-        ctx,
-        &Shell::new(
-            html! {
-                script src="https://cdn.tailwindcss.com" {};
-                meta name="viewport" content="width=device-width, initial-scale=1.0" {};
-            },
-            html! {},
-            Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()),
-        ),
-    )
-    .await
-}
-
+/// Get the favicon of a domain
 #[get("/favicon/<domain>")]
 pub async fn favicon_route(domain: &str) -> Option<DataResponse> {
     let mut buf = Vec::new();
@@ -41,6 +25,7 @@ pub async fn favicon_route(domain: &str) -> Option<DataResponse> {
     ))
 }
 
+/// Websites Overview
 #[get("/")]
 pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringResponse {
     let websites = arc.domains();
@@ -48,7 +33,7 @@ pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringRe
     let content = html! {
         div class="container mx-auto p-4" {
             h1 class="text-5xl font-bold text-center mb-10" { "Websites" };
-            div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-4 gap-6 w-screen" {
+            div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-5 xl:grid-cols-6 2xl:grid-cols-8 gap-6" {
 
                 @for site in websites {
                     a href=(format!("/d/{site}")) class="bg-neutral-900 shadow-md rounded-lg hover:bg-neutral-800 bg-gray-1 hover:cursor-pointer transition-all duration-300 flex flex-col items-center justify-center aspect-square max-w-60" {
@@ -65,43 +50,7 @@ pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringRe
     render_page(content, ctx).await
 }
 
-pub fn arrow_icon(color: &str) -> PreEscaped<String> {
-    html! {
-        svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" {
-            path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {};
-        };
-    }
-}
-
-pub fn slash_seperator() -> PreEscaped<String> {
-    html! {
-        p class="font-bold p-2 text-gray-400" { " / " };
-    }
-}
-
-pub fn gen_path_link(
-    path: &str,
-    index: usize,
-    path_seperations: &[&str],
-    domain: &str,
-) -> PreEscaped<String> {
-    let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect();
-    html! {
-        a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)}
-    }
-}
-
-pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped<String> {
-    html! {
-        @for (index, path) in path_seperations.iter().enumerate() {
-            (gen_path_link(path, index, &path_seperations, domain))
-            @if index < path_seperations.len()-1 {
-                (slash_seperator())
-            };
-        };
-    }
-}
-
+/// Overview on <domain> / <path>
 #[get("/d/<domain>/<paths..>")]
 pub async fn domain_info_route(
     ctx: RequestContext,
@@ -123,6 +72,8 @@ pub async fn domain_info_route(
     let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap());
     let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect();
 
+    // TODO : Show domains beeing linked on the page
+
     let content = html! {
         h2 class="text-xl font-bold mb-4 flex items-center" {
             img class="p-2" src=(format!("/favicon/{}", &domain.name)) {};
@@ -172,6 +123,7 @@ pub async fn domain_info_route(
     render_page(content, ctx).await
 }
 
+/// Return archived version of `domain` / `path` at `time`
 #[get("/s/<domain>/<path..>?<time>")]
 pub async fn render_website(
     domain: &str,
@@ -191,29 +143,22 @@ pub async fn render_website(
             "text/html".to_string(),
             Some(60 * 60 * 24),
         ));
-    } else {
-        if std::env::var("DOWNLOAD_ON_DEMAND")
-            .unwrap_or("false".to_string())
-            .as_str()
-            == "true"
-        {
-            arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
-                .await;
+    } else if std::env::var("DOWNLOAD_ON_DEMAND")
+        .unwrap_or("false".to_string())
+        .as_str()
+        == "true"
+        && time.is_none()
+    {
+        arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
+            .await;
 
-            let content = document
-                .render_local(if time.is_some() {
-                    Some(time.unwrap().to_string())
-                } else {
-                    None
-                })
-                .await?;
+        let content = document.render_local(None).await?;
 
-            return Some(DataResponse::new(
-                content.as_bytes().to_vec(),
-                "text/html".to_string(),
-                Some(60 * 60 * 24),
-            ));
-        }
+        return Some(DataResponse::new(
+            content.as_bytes().to_vec(),
+            "text/html".to_string(),
+            Some(60 * 60 * 24),
+        ));
     }
 
     None