update
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
JMARyA 2024-12-29 20:13:15 +01:00
parent f657a61d55
commit 4ce2a0ceaf
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
7 changed files with 83 additions and 21 deletions

2
Cargo.lock generated
View file

@ -164,7 +164,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "based" name = "based"
version = "0.1.0" version = "0.1.0"
source = "git+https://git.hydrar.de/jmarya/based#04852f2fbcc301d0c2b4098f613b9450b4474363" source = "git+https://git.hydrar.de/jmarya/based#38373021611149d2ebc6d33a269375ec240527cb"
dependencies = [ dependencies = [
"bcrypt", "bcrypt",
"chrono", "chrono",

View file

@ -13,3 +13,5 @@ services:
- "ROUTE_INTERNAL=true" - "ROUTE_INTERNAL=true"
# Download missing routes on demand # Download missing routes on demand
- "DOWNLOAD_ON_DEMAND=true" - "DOWNLOAD_ON_DEMAND=true"
# Blacklisted domains (Comma-seperated regex)
- "BLACKLIST_DOMAINS=google.com,.*.youtube.com"

View file

@ -1,5 +1,10 @@
use std::path::PathBuf; use std::path::PathBuf;
use based::request::RequestContext;
use maud::html;
use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page};
pub fn read_dir(dir: &PathBuf) -> Vec<String> { pub fn read_dir(dir: &PathBuf) -> Vec<String> {
let mut list = Vec::new(); let mut list = Vec::new();
@ -30,6 +35,7 @@ fn internalize_urls(input: &str) -> String {
.to_string() .to_string()
} }
#[derive(Debug, Clone)]
pub struct WebsiteArchive { pub struct WebsiteArchive {
pub dir: PathBuf, pub dir: PathBuf,
} }
@ -41,7 +47,9 @@ pub struct Domain {
impl Domain { impl Domain {
pub fn new(name: &str, dir: PathBuf) -> Self { pub fn new(name: &str, dir: PathBuf) -> Self {
std::fs::create_dir_all(&dir).unwrap(); if !check_blacklist(name) {
std::fs::create_dir_all(&dir).unwrap();
}
Self { Self {
name: name.to_string(), name: name.to_string(),
dir, dir,
@ -123,7 +131,14 @@ impl Document {
format!("/s/{}/{}", self.domain, self.path) format!("/s/{}/{}", self.domain, self.path)
} }
pub fn render_local(&self, version: Option<String>) -> Option<String> { pub async fn render_local(&self, version: Option<String>) -> Option<String> {
if check_blacklist(&self.domain) {
let content = html! {
h3 { "This site is blacklisted" };
};
return Some(render_page(content, RequestContext::default()).await.1 .1);
}
let mut file_path = self.doc_dir(); let mut file_path = self.doc_dir();
let latest_version = if let Some(version) = version { let latest_version = if let Some(version) = version {
@ -175,14 +190,24 @@ impl WebsiteArchive {
} }
/// Archive a URL /// Archive a URL
pub fn archive_url(&self, url: &str) { pub async fn archive_url(&self, url: &str) {
let parsed_url = url::Url::parse(url).unwrap(); let parsed_url = url::Url::parse(url).unwrap();
let domain = parsed_url.domain().unwrap().trim_start_matches("www."); let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
// Deny blacklist
if check_blacklist(domain) {
return;
}
let path = parsed_url.path(); let path = parsed_url.path();
let mut folder_name = self.dir.join(&domain); let mut folder_name = self.dir.join(&domain);
if !std::fs::exists(&folder_name).unwrap() {
download_fav_for(domain).await;
}
for paths in path.split('/') { for paths in path.split('/') {
if !paths.is_empty() { if !paths.is_empty() {
folder_name = folder_name.join(paths); folder_name = folder_name.join(paths);

18
src/blacklist.rs Normal file
View file

@ -0,0 +1,18 @@
pub fn check_blacklist(domain: &str) -> bool {
let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default();
if blacklist_raw.is_empty() {
return false;
}
let blacklist: Vec<&str> = blacklist_raw.split(',').collect();
for domain_regex in blacklist {
let rgx = regex::Regex::new(domain_regex).unwrap();
if rgx.is_match(domain) {
return true;
}
}
return false;
}

View file

@ -15,10 +15,15 @@ pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
Some(favicon_data) Some(favicon_data)
} }
pub async fn download_favicons_for_sites(sites: Vec<String>) { pub async fn download_fav_for(site: &str) {
for site in sites { if let Some(fav) = download_favicon(&site).await {
if let Some(fav) = download_favicon(&site).await { std::fs::write(std::path::Path::new("./favicon").join(site), fav).unwrap();
std::fs::write(std::path::Path::new("./favicon").join(site), fav).unwrap(); log::info!("Writting favicon for {site}");
} }
}
pub async fn download_favicons_for_sites(sites: &[String]) {
for site in sites {
download_fav_for(site).await;
} }
} }

View file

@ -2,6 +2,7 @@ use archive::WebsiteArchive;
use rocket::routes; use rocket::routes;
mod archive; mod archive;
mod blacklist;
mod favicon; mod favicon;
mod pages; mod pages;
@ -11,7 +12,11 @@ async fn launch() -> _ {
let arc = WebsiteArchive::new("./websites"); let arc = WebsiteArchive::new("./websites");
favicon::download_favicons_for_sites(arc.domains()).await; let archive = arc.clone();
tokio::spawn(async move {
favicon::download_favicons_for_sites(&archive.domains()).await;
});
rocket::build() rocket::build()
.mount( .mount(

View file

@ -113,11 +113,13 @@ pub async fn render_website(
) -> Option<StringResponse> { ) -> Option<StringResponse> {
let document = arc.get_domain(domain).path(path.to_str().unwrap()); let document = arc.get_domain(domain).path(path.to_str().unwrap());
let content = document.render_local(if time.is_some() { let content = document
Some(time.unwrap().to_string()) .render_local(if time.is_some() {
} else { Some(time.unwrap().to_string())
None } else {
}); None
})
.await;
if let Some(content) = content { if let Some(content) = content {
return Some(respond_html(&content)); return Some(respond_html(&content));
@ -127,13 +129,18 @@ pub async fn render_website(
.as_str() .as_str()
== "true" == "true"
{ {
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap())); arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
.await;
return Some(respond_html(&document.render_local(if time.is_some() { return Some(respond_html(
Some(time.unwrap().to_string()) &document
} else { .render_local(if time.is_some() {
None Some(time.unwrap().to_string())
})?)); } else {
None
})
.await?,
));
} }
} }