parent
9b96a21906
commit
654d4b9cba
7 changed files with 222 additions and 105 deletions
5
Cargo.lock
generated
5
Cargo.lock
generated
|
@ -3384,22 +3384,17 @@ dependencies = [
|
|||
"chrono",
|
||||
"env_logger",
|
||||
"futures",
|
||||
"hex",
|
||||
"log",
|
||||
"maud",
|
||||
"rayon",
|
||||
"regex",
|
||||
"reqwest 0.12.11",
|
||||
"ring 0.16.20",
|
||||
"rocket",
|
||||
"rocket_cors",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlx",
|
||||
"tokio",
|
||||
"url",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -5,16 +5,11 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
env_logger = "0.10.0"
|
||||
hex = "0.4.3"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.9.5"
|
||||
ring = "0.16.20"
|
||||
walkdir = "2.4.0"
|
||||
chrono = { version = "0.4.38", features = ["serde"] }
|
||||
futures = "0.3.30"
|
||||
log = "0.4.20"
|
||||
rocket = { version = "0.5.1", features = ["json"] }
|
||||
rocket_cors = "0.6.0"
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = "1.0.111"
|
||||
tokio = { version = "1.35.1", features = ["full"] }
|
||||
|
|
100
src/archive.rs
100
src/archive.rs
|
@ -3,24 +3,24 @@ use std::path::PathBuf;
|
|||
use based::request::RequestContext;
|
||||
use maud::html;
|
||||
|
||||
use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page};
|
||||
use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::component::render_page};
|
||||
|
||||
/// Read directory entries into `Vec<String>`
|
||||
pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
||||
let mut list = Vec::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(dir) {
|
||||
for entry in entries {
|
||||
if let Ok(entry) = entry {
|
||||
for entry in entries.flatten() {
|
||||
if let Some(file_name) = entry.file_name().to_str() {
|
||||
list.push(file_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list
|
||||
}
|
||||
|
||||
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
|
||||
fn internalize_urls(input: &str) -> String {
|
||||
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
|
||||
let re = regex::Regex::new(url_pattern).unwrap();
|
||||
|
@ -35,17 +35,30 @@ fn internalize_urls(input: &str) -> String {
|
|||
.to_string()
|
||||
}
|
||||
|
||||
/// Represents a directory containg archived websites
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WebsiteArchive {
|
||||
pub dir: PathBuf,
|
||||
}
|
||||
|
||||
/// Represents a domain within the website archive
|
||||
pub struct Domain {
|
||||
/// Domain name
|
||||
pub name: String,
|
||||
dir: PathBuf,
|
||||
}
|
||||
|
||||
impl Domain {
|
||||
/// Creates a new `Domain` instance.
|
||||
///
|
||||
/// If the domain name is not blacklisted, a directory is created.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `name`: The name of the domain.
|
||||
/// - `dir`: The directory path for the domain.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new `Domain` instance.
|
||||
pub fn new(name: &str, dir: PathBuf) -> Self {
|
||||
if !check_blacklist(name) {
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
|
@ -56,10 +69,26 @@ impl Domain {
|
|||
}
|
||||
}
|
||||
|
||||
/// Resolves a specific path within the domain and returns a `Document` representing it.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `path`: The path to resolve within the domain.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Document` instance corresponding to the given path.
|
||||
pub fn path(&self, path: &str) -> Document {
|
||||
Document::new(&self.name, path, self.dir.parent().unwrap().to_path_buf())
|
||||
}
|
||||
|
||||
/// Retrieves entries and metadata for a given path within the domain.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `path`: The path to inspect.
|
||||
///
|
||||
/// # Returns
|
||||
/// A tuple containing:
|
||||
/// - A vector of `PathEntry` instances representing the contents of the path.
|
||||
/// - A boolean indicating whether the path is itself a `Document`
|
||||
pub fn paths(&self, path: &str) -> (Vec<PathEntry>, bool) {
|
||||
let mut base_path = self.dir.clone();
|
||||
|
||||
|
@ -87,6 +116,7 @@ impl Domain {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents an entry within a domain's path, containing its name and URL path.
|
||||
pub struct PathEntry(String, String);
|
||||
|
||||
impl PathEntry {
|
||||
|
@ -99,13 +129,25 @@ impl PathEntry {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents a document within a domain
|
||||
pub struct Document {
|
||||
/// The domain associated with the document.
|
||||
pub domain: String,
|
||||
/// The path of the document within the domain.
|
||||
pub path: String,
|
||||
base_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl Document {
|
||||
/// Creates a new `Document` instance.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `domain`: The domain to which the document belongs.
|
||||
/// - `path`: The path of the document within the domain.
|
||||
/// - `base_dir`: The base directory of the archive storage.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new `Document` instance.
|
||||
pub fn new(domain: &str, path: &str, base_dir: PathBuf) -> Self {
|
||||
Self {
|
||||
domain: domain.to_string(),
|
||||
|
@ -114,10 +156,15 @@ impl Document {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn url(&self) -> String {
|
||||
format!("/s/{}/{}", self.domain, self.path)
|
||||
}
|
||||
|
||||
/// Renders the document, returning its content as a string.
|
||||
///
|
||||
/// If the environment variable `$ROUTE_INTERNAL` is set to `true`, all links will be rewritten to point to internal archived routes.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `version`: An optional version of the document to render in the format `YYYY-MM-DD`.
|
||||
///
|
||||
/// # Returns
|
||||
/// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
|
||||
pub async fn render_local(&self, version: Option<String>) -> Option<String> {
|
||||
if check_blacklist(&self.domain) {
|
||||
let content = html! {
|
||||
|
@ -146,6 +193,10 @@ impl Document {
|
|||
}
|
||||
}
|
||||
|
||||
/// Determines the directory where the document is stored.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `PathBuf` representing the document directory.
|
||||
pub fn doc_dir(&self) -> PathBuf {
|
||||
let mut file_path = self.base_dir.join(&self.domain);
|
||||
|
||||
|
@ -156,6 +207,10 @@ impl Document {
|
|||
file_path
|
||||
}
|
||||
|
||||
/// Retrieves available versions of the document.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of strings representing the available versions of the document, sorted in descending order.
|
||||
pub fn versions(&self) -> Vec<String> {
|
||||
let mut res: Vec<String> = read_dir(&self.doc_dir())
|
||||
.into_iter()
|
||||
|
@ -168,21 +223,46 @@ impl Document {
|
|||
}
|
||||
|
||||
impl WebsiteArchive {
|
||||
/// Creates a new `WebsiteArchive` instance.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `dir`: The directory path where the archive will be stored.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new `WebsiteArchive` instance.
|
||||
pub fn new(dir: &str) -> Self {
|
||||
Self {
|
||||
dir: PathBuf::from(dir),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieves the list of domain names stored in the archive.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of domain names as strings.
|
||||
pub fn domains(&self) -> Vec<String> {
|
||||
read_dir(&self.dir)
|
||||
}
|
||||
|
||||
/// Retrieves a `Domain` instance for a specified domain name.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `domain`: The name of the domain to retrieve.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Domain` instance corresponding to the specified domain.
|
||||
pub fn get_domain(&self, domain: &str) -> Domain {
|
||||
Domain::new(domain, self.dir.join(domain))
|
||||
}
|
||||
|
||||
/// Archive a URL
|
||||
/// Archives a URL by downloading and storing its content.
|
||||
///
|
||||
/// If the URL does not pass the blacklist check, it will not be archived.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `url`: The URL to archive.
|
||||
///
|
||||
/// This function downloads the content of the URL, processes it, and saves it to the archive.
|
||||
pub async fn archive_url(&self, url: &str) {
|
||||
let parsed_url = url::Url::parse(url).unwrap();
|
||||
|
||||
|
@ -197,9 +277,7 @@ impl WebsiteArchive {
|
|||
|
||||
let mut folder_name = self.dir.join(domain);
|
||||
|
||||
if !std::fs::exists(&folder_name).unwrap() {
|
||||
download_fav_for(domain).await;
|
||||
}
|
||||
|
||||
for paths in path.split('/') {
|
||||
if !paths.is_empty() {
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
/// Checks if a domain is present in the blacklist of unwanted domains.
|
||||
///
|
||||
/// This function checks the `$BLACKLIST_DOMAINS` environment variable for a comma-separated list of regular expressions to match against.
|
||||
/// If a match is found, it immediately returns `true`. Otherwise, it returns `false`.
|
||||
pub fn check_blacklist(domain: &str) -> bool {
|
||||
let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default();
|
||||
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
/// Downloads a favicon for the given domain.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `domain`: The domain for which to download the favicon.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `Vec<u8>` containing the favicon data, or `None` if an error occurred.
|
||||
pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
|
||||
let mut favicon_url = url::Url::parse(&format!("https://{}", domain)).ok()?;
|
||||
favicon_url.set_path("/favicon.ico");
|
||||
|
@ -15,6 +24,9 @@ pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
|
|||
Some(favicon_data)
|
||||
}
|
||||
|
||||
/// Downloads a favicon for `site` and stores it.
|
||||
///
|
||||
/// This will not download a favicon if it is already present.
|
||||
pub async fn download_fav_for(site: &str) {
|
||||
if let Some(fav) = download_favicon(site).await {
|
||||
let fav_path = std::path::Path::new("./favicon").join(site);
|
||||
|
|
88
src/pages/component.rs
Normal file
88
src/pages/component.rs
Normal file
|
@ -0,0 +1,88 @@
|
|||
use based::{
|
||||
page::Shell,
|
||||
request::{RequestContext, StringResponse},
|
||||
};
|
||||
use maud::{html, PreEscaped};
|
||||
|
||||
/// Generates an SVG arrow icon with the specified color.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `color`: The color of the arrow icon.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `PreEscaped<String>` containing the SVG markup for the arrow icon.
|
||||
pub fn arrow_icon(color: &str) -> PreEscaped<String> {
|
||||
html! {
|
||||
svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" {
|
||||
path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a styled slash separator.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `PreEscaped<String>` containing the HTML markup for a slash separator.
|
||||
pub fn slash_seperator() -> PreEscaped<String> {
|
||||
html! {
|
||||
p class="font-bold p-2 text-gray-400" { " / " };
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a hyperlink for a specific path within a domain.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `path`: The path segment to link.
|
||||
/// - `index`: The index of the current path segment in the hierarchy.
|
||||
/// - `path_seperations`: The array of all path segments in the hierarchy.
|
||||
/// - `domain`: The domain to which the path belongs.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `PreEscaped<String>` containing the HTML markup for the hyperlink.
|
||||
pub fn gen_path_link(
|
||||
path: &str,
|
||||
index: usize,
|
||||
path_seperations: &[&str],
|
||||
domain: &str,
|
||||
) -> PreEscaped<String> {
|
||||
let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect();
|
||||
html! {
|
||||
a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a breadcrumb-like header for a path within a domain.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `path_seperations`: A vector of path segments representing the hierarchy.
|
||||
/// - `domain`: The domain to which the path belongs.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `PreEscaped<String>` containing the HTML markup for the path header.
|
||||
pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped<String> {
|
||||
html! {
|
||||
@for (index, path) in path_seperations.iter().enumerate() {
|
||||
(gen_path_link(path, index, &path_seperations, domain))
|
||||
@if index < path_seperations.len()-1 {
|
||||
(slash_seperator())
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn render_page(content: PreEscaped<String>, ctx: RequestContext) -> StringResponse {
|
||||
based::page::render_page(
|
||||
content,
|
||||
"Website Archive",
|
||||
ctx,
|
||||
&Shell::new(
|
||||
html! {
|
||||
script src="https://cdn.tailwindcss.com" {};
|
||||
meta name="viewport" content="width=device-width, initial-scale=1.0" {};
|
||||
},
|
||||
html! {},
|
||||
Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()),
|
||||
),
|
||||
)
|
||||
.await
|
||||
}
|
|
@ -1,31 +1,15 @@
|
|||
use std::{io::Read, path::PathBuf};
|
||||
|
||||
use based::{
|
||||
page::Shell,
|
||||
request::{assets::DataResponse, RequestContext, StringResponse},
|
||||
};
|
||||
use maud::{html, PreEscaped};
|
||||
use based::request::{assets::DataResponse, RequestContext, StringResponse};
|
||||
use maud::html;
|
||||
use rocket::{get, State};
|
||||
|
||||
pub mod component;
|
||||
use component::*;
|
||||
|
||||
use crate::archive::WebsiteArchive;
|
||||
|
||||
pub async fn render_page(content: PreEscaped<String>, ctx: RequestContext) -> StringResponse {
|
||||
based::page::render_page(
|
||||
content,
|
||||
"Website Archive",
|
||||
ctx,
|
||||
&Shell::new(
|
||||
html! {
|
||||
script src="https://cdn.tailwindcss.com" {};
|
||||
meta name="viewport" content="width=device-width, initial-scale=1.0" {};
|
||||
},
|
||||
html! {},
|
||||
Some("bg-zinc-950 text-white min-h-screen flex pt-8 justify-center".to_string()),
|
||||
),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get the favicon of a domain
|
||||
#[get("/favicon/<domain>")]
|
||||
pub async fn favicon_route(domain: &str) -> Option<DataResponse> {
|
||||
let mut buf = Vec::new();
|
||||
|
@ -41,6 +25,7 @@ pub async fn favicon_route(domain: &str) -> Option<DataResponse> {
|
|||
))
|
||||
}
|
||||
|
||||
/// Websites Overview
|
||||
#[get("/")]
|
||||
pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringResponse {
|
||||
let websites = arc.domains();
|
||||
|
@ -48,7 +33,7 @@ pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringRe
|
|||
let content = html! {
|
||||
div class="container mx-auto p-4" {
|
||||
h1 class="text-5xl font-bold text-center mb-10" { "Websites" };
|
||||
div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-4 gap-6 w-screen" {
|
||||
div class="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-5 xl:grid-cols-6 2xl:grid-cols-8 gap-6" {
|
||||
|
||||
@for site in websites {
|
||||
a href=(format!("/d/{site}")) class="bg-neutral-900 shadow-md rounded-lg hover:bg-neutral-800 bg-gray-1 hover:cursor-pointer transition-all duration-300 flex flex-col items-center justify-center aspect-square max-w-60" {
|
||||
|
@ -65,43 +50,7 @@ pub async fn index(ctx: RequestContext, arc: &State<WebsiteArchive>) -> StringRe
|
|||
render_page(content, ctx).await
|
||||
}
|
||||
|
||||
pub fn arrow_icon(color: &str) -> PreEscaped<String> {
|
||||
html! {
|
||||
svg class=(format!("w-5 h-5 text-{color}-500")) xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" {
|
||||
path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" {};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub fn slash_seperator() -> PreEscaped<String> {
|
||||
html! {
|
||||
p class="font-bold p-2 text-gray-400" { " / " };
|
||||
}
|
||||
}
|
||||
|
||||
pub fn gen_path_link(
|
||||
path: &str,
|
||||
index: usize,
|
||||
path_seperations: &[&str],
|
||||
domain: &str,
|
||||
) -> PreEscaped<String> {
|
||||
let upto: Vec<&str> = path_seperations.iter().take(index + 1).cloned().collect();
|
||||
html! {
|
||||
a href=(format!("/d/{}/{}", domain, upto.join("/"))) { (path)}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn gen_path_header(path_seperations: Vec<&str>, domain: &str) -> PreEscaped<String> {
|
||||
html! {
|
||||
@for (index, path) in path_seperations.iter().enumerate() {
|
||||
(gen_path_link(path, index, &path_seperations, domain))
|
||||
@if index < path_seperations.len()-1 {
|
||||
(slash_seperator())
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Overview on <domain> / <path>
|
||||
#[get("/d/<domain>/<paths..>")]
|
||||
pub async fn domain_info_route(
|
||||
ctx: RequestContext,
|
||||
|
@ -123,6 +72,8 @@ pub async fn domain_info_route(
|
|||
let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap());
|
||||
let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect();
|
||||
|
||||
// TODO : Show domains beeing linked on the page
|
||||
|
||||
let content = html! {
|
||||
h2 class="text-xl font-bold mb-4 flex items-center" {
|
||||
img class="p-2" src=(format!("/favicon/{}", &domain.name)) {};
|
||||
|
@ -172,6 +123,7 @@ pub async fn domain_info_route(
|
|||
render_page(content, ctx).await
|
||||
}
|
||||
|
||||
/// Return archived version of `domain` / `path` at `time`
|
||||
#[get("/s/<domain>/<path..>?<time>")]
|
||||
pub async fn render_website(
|
||||
domain: &str,
|
||||
|
@ -191,22 +143,16 @@ pub async fn render_website(
|
|||
"text/html".to_string(),
|
||||
Some(60 * 60 * 24),
|
||||
));
|
||||
} else {
|
||||
if std::env::var("DOWNLOAD_ON_DEMAND")
|
||||
} else if std::env::var("DOWNLOAD_ON_DEMAND")
|
||||
.unwrap_or("false".to_string())
|
||||
.as_str()
|
||||
== "true"
|
||||
&& time.is_none()
|
||||
{
|
||||
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
|
||||
.await;
|
||||
|
||||
let content = document
|
||||
.render_local(if time.is_some() {
|
||||
Some(time.unwrap().to_string())
|
||||
} else {
|
||||
None
|
||||
})
|
||||
.await?;
|
||||
let content = document.render_local(None).await?;
|
||||
|
||||
return Some(DataResponse::new(
|
||||
content.as_bytes().to_vec(),
|
||||
|
@ -214,7 +160,6 @@ pub async fn render_website(
|
|||
Some(60 * 60 * 24),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue