diff --git a/Cargo.lock b/Cargo.lock index b95557d..10e9ca6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3384,22 +3384,17 @@ dependencies = [ "chrono", "env_logger", "futures", - "hex", "log", "maud", - "rayon", "regex", "reqwest 0.12.11", - "ring 0.16.20", "rocket", - "rocket_cors", "serde", "serde_json", "sqlx", "tokio", "url", "uuid", - "walkdir", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1d1f20e..cc5a0f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,16 +5,11 @@ edition = "2021" [dependencies] env_logger = "0.10.0" -hex = "0.4.3" -rayon = "1.7.0" regex = "1.9.5" -ring = "0.16.20" -walkdir = "2.4.0" chrono = { version = "0.4.38", features = ["serde"] } futures = "0.3.30" log = "0.4.20" rocket = { version = "0.5.1", features = ["json"] } -rocket_cors = "0.6.0" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" tokio = { version = "1.35.1", features = ["full"] } diff --git a/src/archive.rs b/src/archive.rs index e784a50..7fc566c 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -3,17 +3,16 @@ use std::path::PathBuf; use based::request::RequestContext; use maud::html; -use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page}; +use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::component::render_page}; +/// Read directory entries into `Vec` pub fn read_dir(dir: &PathBuf) -> Vec { let mut list = Vec::new(); if let Ok(entries) = std::fs::read_dir(dir) { - for entry in entries { - if let Ok(entry) = entry { - if let Some(file_name) = entry.file_name().to_str() { - list.push(file_name.to_string()); - } + for entry in entries.flatten() { + if let Some(file_name) = entry.file_name().to_str() { + list.push(file_name.to_string()); } } } @@ -21,6 +20,7 @@ pub fn read_dir(dir: &PathBuf) -> Vec { list } +/// Rewrite all URLs in `input` to the format `/s//` fn internalize_urls(input: &str) -> String { let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)"; let re = regex::Regex::new(url_pattern).unwrap(); @@ -35,17 +35,30 @@ fn internalize_urls(input: &str) -> String { .to_string() } +/// Represents a directory containg archived websites #[derive(Debug, Clone)] pub struct WebsiteArchive { pub dir: PathBuf, } +/// Represents a domain within the website archive pub struct Domain { + /// Domain name pub name: String, dir: PathBuf, } impl Domain { + /// Creates a new `Domain` instance. + /// + /// If the domain name is not blacklisted, a directory is created. + /// + /// # Parameters + /// - `name`: The name of the domain. + /// - `dir`: The directory path for the domain. + /// + /// # Returns + /// A new `Domain` instance. pub fn new(name: &str, dir: PathBuf) -> Self { if !check_blacklist(name) { std::fs::create_dir_all(&dir).unwrap(); @@ -56,10 +69,26 @@ impl Domain { } } + /// Resolves a specific path within the domain and returns a `Document` representing it. + /// + /// # Parameters + /// - `path`: The path to resolve within the domain. + /// + /// # Returns + /// A `Document` instance corresponding to the given path. pub fn path(&self, path: &str) -> Document { Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf()) } + /// Retrieves entries and metadata for a given path within the domain. + /// + /// # Parameters + /// - `path`: The path to inspect. + /// + /// # Returns + /// A tuple containing: + /// - A vector of `PathEntry` instances representing the contents of the path. + /// - A boolean indicating whether the path is itself a `Document` pub fn paths(&self, path: &str) -> (Vec, bool) { let mut base_path = self.dir.clone(); @@ -87,6 +116,7 @@ impl Domain { } } +/// Represents an entry within a domain's path, containing its name and URL path. pub struct PathEntry(String, String); impl PathEntry { @@ -99,13 +129,25 @@ impl PathEntry { } } +/// Represents a document within a domain pub struct Document { + /// The domain associated with the document. pub domain: String, + /// The path of the document within the domain. pub path: String, base_dir: PathBuf, } impl Document { + /// Creates a new `Document` instance. + /// + /// # Parameters + /// - `domain`: The domain to which the document belongs. + /// - `path`: The path of the document within the domain. + /// - `base_dir`: The base directory of the archive storage. + /// + /// # Returns + /// A new `Document` instance. pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self { Self { domain: domain.to_string(), @@ -114,10 +156,15 @@ impl Document { } } - pub fn url(&self) -> String { - format!("/s/{}/{}", self.domain, self.path) - } - + /// Renders the document, returning its content as a string. + /// + /// If the environment variable `$ROUTE_INTERNAL` is set to `true`, all links will be rewritten to point to internal archived routes. + /// + /// # Parameters + /// - `version`: An optional version of the document to render in the format `YYYY-MM-DD`. + /// + /// # Returns + /// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered. pub async fn render_local(&self, version: Option) -> Option { if check_blacklist(&self.domain) { let content = html! { @@ -146,6 +193,10 @@ impl Document { } } + /// Determines the directory where the document is stored. + /// + /// # Returns + /// A `PathBuf` representing the document directory. pub fn doc_dir(&self) -> PathBuf { let mut file_path = self.base_dir.join(&self.domain); @@ -156,6 +207,10 @@ impl Document { file_path } + /// Retrieves available versions of the document. + /// + /// # Returns + /// A vector of strings representing the available versions of the document, sorted in descending order. pub fn versions(&self) -> Vec { let mut res: Vec = read_dir(&self.doc_dir()) .into_iter() @@ -168,21 +223,46 @@ impl Document { } impl WebsiteArchive { + /// Creates a new `WebsiteArchive` instance. + /// + /// # Parameters + /// - `dir`: The directory path where the archive will be stored. + /// + /// # Returns + /// A new `WebsiteArchive` instance. pub fn new(dir: &str) -> Self { Self { dir: PathBuf::from(dir), } } + /// Retrieves the list of domain names stored in the archive. + /// + /// # Returns + /// A vector of domain names as strings. pub fn domains(&self) -> Vec { read_dir(&self.dir) } + /// Retrieves a `Domain` instance for a specified domain name. + /// + /// # Parameters + /// - `domain`: The name of the domain to retrieve. + /// + /// # Returns + /// A `Domain` instance corresponding to the specified domain. pub fn get_domain(&self, domain: &str) -> Domain { Domain::new(domain, self.dir.join(domain)) } - /// Archive a URL + /// Archives a URL by downloading and storing its content. + /// + /// If the URL does not pass the blacklist check, it will not be archived. + /// + /// # Parameters + /// - `url`: The URL to archive. + /// + /// This function downloads the content of the URL, processes it, and saves it to the archive. pub async fn archive_url(&self, url: &str) { let parsed_url = url::Url::parse(url).unwrap(); @@ -197,9 +277,7 @@ impl WebsiteArchive { let mut folder_name = self.dir.join(domain); - if !std::fs::exists(&folder_name).unwrap() { - download_fav_for(domain).await; - } + download_fav_for(domain).await; for paths in path.split('/') { if !paths.is_empty() { diff --git a/src/blacklist.rs b/src/blacklist.rs index 67d480b..84ea10f 100644 --- a/src/blacklist.rs +++ b/src/blacklist.rs @@ -1,3 +1,7 @@ +/// Checks if a domain is present in the blacklist of unwanted domains. +/// +/// This function checks the `$BLACKLIST_DOMAINS` environment variable for a comma-separated list of regular expressions to match against. +/// If a match is found, it immediately returns `true`. Otherwise, it returns `false`. pub fn check_blacklist(domain: &str) -> bool { let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default(); diff --git a/src/favicon.rs b/src/favicon.rs index bfe0e45..ee5c28c 100644 --- a/src/favicon.rs +++ b/src/favicon.rs @@ -1,3 +1,12 @@ +/// Downloads a favicon for the given domain. +/// +/// # Parameters +/// +/// * `domain`: The domain for which to download the favicon. +/// +/// # Returns +/// +/// A `Vec` containing the favicon data, or `None` if an error occurred. pub async fn download_favicon(domain: &str) -> Option> { let mut favicon_url = url::Url::parse(&format!("https://{}", domain)).ok()?; favicon_url.set_path("/favicon.ico"); @@ -15,6 +24,9 @@ pub async fn download_favicon(domain: &str) -> Option> { Some(favicon_data) } +/// Downloads a favicon for `site` and stores it. +/// +/// This will not download a favicon if it is already present. pub async fn download_fav_for(site: &str) { if let Some(fav) = download_favicon(site).await { let fav_path = std::path::Path::new("./favicon").join(site); diff --git a/src/pages/component.rs b/src/pages/component.rs new file mode 100644 index 0000000..a7931ba --- /dev/null +++ b/src/pages/component.rs @@ -0,0 +1,88 @@ +use based::{ + page::Shell, + request::{RequestContext, StringResponse}, +}; +use maud::{html, PreEscaped}; + +/// Generates an SVG arrow icon with the specified color. +/// +/// # Parameters +/// - `color`: The color of the arrow icon. +/// +/// # Returns +/// A `PreEscaped` containing the SVG markup for the arrow icon. +pub fn arrow_icon(color: &str) -> PreEscaped { + html! { + svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" { + path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {}; + }; + } +} + +/// Generates a styled slash separator. +/// +/// # Returns +/// A `PreEscaped` containing the HTML markup for a slash separator. +pub fn slash_seperator() -> PreEscaped { + html! { + p class="font-bold p-2 text-gray-400" { " / " }; + } +} + +/// Generates a hyperlink for a specific path within a domain. +/// +/// # Parameters +/// - `path`: The path segment to link. +/// - `index`: The index of the current path segment in the hierarchy. +/// - `path_seperations`: The array of all path segments in the hierarchy. +/// - `domain`: The domain to which the path belongs. +/// +/// # Returns +/// A `PreEscaped` containing the HTML markup for the hyperlink. +pub fn gen_path_link( + path: &str, + index: usize, + path_seperations: &[&str], + domain: &str, +) -> PreEscaped { + let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect(); + html! { + a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)} + } +} + +/// Generates a breadcrumb-like header for a path within a domain. +/// +/// # Parameters +/// - `path_seperations`: A vector of path segments representing the hierarchy. +/// - `domain`: The domain to which the path belongs. +/// +/// # Returns +/// A `PreEscaped` containing the HTML markup for the path header. +pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped { + html! { + @for (index, path) in path_seperations.iter().enumerate() { + (gen_path_link(path, index, &path_seperations, domain)) + @if index < path_seperations.len()-1 { + (slash_seperator()) + }; + }; + } +} + +pub async fn render_page(content: PreEscaped, ctx: RequestContext) -> StringResponse { + based::page::render_page( + content, + "Website Archive", + ctx, + &Shell::new( + html! { + script src="https://cdn.tailwindcss.com" {}; + meta name="viewport" content="width=device-width, initial-scale=1.0" {}; + }, + html! {}, + Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()), + ), + ) + .await +} diff --git a/src/pages/mod.rs b/src/pages/mod.rs index b2809a5..4b1b502 100644 --- a/src/pages/mod.rs +++ b/src/pages/mod.rs @@ -1,31 +1,15 @@ use std::{io::Read, path::PathBuf}; -use based::{ - page::Shell, - request::{assets::DataResponse, RequestContext, StringResponse}, -}; -use maud::{html, PreEscaped}; +use based::request::{assets::DataResponse, RequestContext, StringResponse}; +use maud::html; use rocket::{get, State}; +pub mod component; +use component::*; + use crate::archive::WebsiteArchive; -pub async fn render_page(content: PreEscaped, ctx: RequestContext) -> StringResponse { - based::page::render_page( - content, - "Website Archive", - ctx, - &Shell::new( - html! { - script src="https://cdn.tailwindcss.com" {}; - meta name="viewport" content="width=device-width, initial-scale=1.0" {}; - }, - html! {}, - Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()), - ), - ) - .await -} - +/// Get the favicon of a domain #[get("/favicon/")] pub async fn favicon_route(domain: &str) -> Option { let mut buf = Vec::new(); @@ -41,6 +25,7 @@ pub async fn favicon_route(domain: &str) -> Option { )) } +/// Websites Overview #[get("/")] pub async fn index(ctx: RequestContext, arc: &State) -> StringResponse { let websites = arc.domains(); @@ -48,7 +33,7 @@ pub async fn index(ctx: RequestContext, arc: &State) -> StringRe let content = html! { div class="container mx-auto p-4" { h1 class="text-5xl font-bold text-center mb-10" { "Websites" }; - div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-4 gap-6 w-screen" { + div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-5 xl:grid-cols-6 2xl:grid-cols-8 gap-6" { @for site in websites { a href=(format!("/d/{site}")) class="bg-neutral-900 shadow-md rounded-lg hover:bg-neutral-800 bg-gray-1 hover:cursor-pointer transition-all duration-300 flex flex-col items-center justify-center aspect-square max-w-60" { @@ -65,43 +50,7 @@ pub async fn index(ctx: RequestContext, arc: &State) -> StringRe render_page(content, ctx).await } -pub fn arrow_icon(color: &str) -> PreEscaped { - html! { - svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" { - path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {}; - }; - } -} - -pub fn slash_seperator() -> PreEscaped { - html! { - p class="font-bold p-2 text-gray-400" { " / " }; - } -} - -pub fn gen_path_link( - path: &str, - index: usize, - path_seperations: &[&str], - domain: &str, -) -> PreEscaped { - let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect(); - html! { - a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)} - } -} - -pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped { - html! { - @for (index, path) in path_seperations.iter().enumerate() { - (gen_path_link(path, index, &path_seperations, domain)) - @if index < path_seperations.len()-1 { - (slash_seperator()) - }; - }; - } -} - +/// Overview on / #[get("/d//")] pub async fn domain_info_route( ctx: RequestContext, @@ -123,6 +72,8 @@ pub async fn domain_info_route( let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap()); let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect(); + // TODO : Show domains beeing linked on the page + let content = html! { h2 class="text-xl font-bold mb-4 flex items-center" { img class="p-2" src=(format!("/favicon/{}", &domain.name)) {}; @@ -172,6 +123,7 @@ pub async fn domain_info_route( render_page(content, ctx).await } +/// Return archived version of `domain` / `path` at `time` #[get("/s//?