parent
f657a61d55
commit
4ce2a0ceaf
7 changed files with 83 additions and 21 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -164,7 +164,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "based"
|
name = "based"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://git.hydrar.de/jmarya/based#04852f2fbcc301d0c2b4098f613b9450b4474363"
|
source = "git+https://git.hydrar.de/jmarya/based#38373021611149d2ebc6d33a269375ec240527cb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bcrypt",
|
"bcrypt",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
|
|
@ -13,3 +13,5 @@ services:
|
||||||
- "ROUTE_INTERNAL=true"
|
- "ROUTE_INTERNAL=true"
|
||||||
# Download missing routes on demand
|
# Download missing routes on demand
|
||||||
- "DOWNLOAD_ON_DEMAND=true"
|
- "DOWNLOAD_ON_DEMAND=true"
|
||||||
|
# Blacklisted domains (Comma-seperated regex)
|
||||||
|
- "BLACKLIST_DOMAINS=google.com,.*.youtube.com"
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use based::request::RequestContext;
|
||||||
|
use maud::html;
|
||||||
|
|
||||||
|
use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page};
|
||||||
|
|
||||||
pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
||||||
let mut list = Vec::new();
|
let mut list = Vec::new();
|
||||||
|
|
||||||
|
@ -30,6 +35,7 @@ fn internalize_urls(input: &str) -> String {
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub struct WebsiteArchive {
|
pub struct WebsiteArchive {
|
||||||
pub dir: PathBuf,
|
pub dir: PathBuf,
|
||||||
}
|
}
|
||||||
|
@ -41,7 +47,9 @@ pub struct Domain {
|
||||||
|
|
||||||
impl Domain {
|
impl Domain {
|
||||||
pub fn new(name: &str, dir: PathBuf) -> Self {
|
pub fn new(name: &str, dir: PathBuf) -> Self {
|
||||||
std::fs::create_dir_all(&dir).unwrap();
|
if !check_blacklist(name) {
|
||||||
|
std::fs::create_dir_all(&dir).unwrap();
|
||||||
|
}
|
||||||
Self {
|
Self {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
dir,
|
dir,
|
||||||
|
@ -123,7 +131,14 @@ impl Document {
|
||||||
format!("/s/{}/{}", self.domain, self.path)
|
format!("/s/{}/{}", self.domain, self.path)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn render_local(&self, version: Option<String>) -> Option<String> {
|
pub async fn render_local(&self, version: Option<String>) -> Option<String> {
|
||||||
|
if check_blacklist(&self.domain) {
|
||||||
|
let content = html! {
|
||||||
|
h3 { "This site is blacklisted" };
|
||||||
|
};
|
||||||
|
return Some(render_page(content, RequestContext::default()).await.1 .1);
|
||||||
|
}
|
||||||
|
|
||||||
let mut file_path = self.doc_dir();
|
let mut file_path = self.doc_dir();
|
||||||
|
|
||||||
let latest_version = if let Some(version) = version {
|
let latest_version = if let Some(version) = version {
|
||||||
|
@ -175,14 +190,24 @@ impl WebsiteArchive {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Archive a URL
|
/// Archive a URL
|
||||||
pub fn archive_url(&self, url: &str) {
|
pub async fn archive_url(&self, url: &str) {
|
||||||
let parsed_url = url::Url::parse(url).unwrap();
|
let parsed_url = url::Url::parse(url).unwrap();
|
||||||
|
|
||||||
let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
|
let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
|
||||||
|
|
||||||
|
// Deny blacklist
|
||||||
|
if check_blacklist(domain) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let path = parsed_url.path();
|
let path = parsed_url.path();
|
||||||
|
|
||||||
let mut folder_name = self.dir.join(&domain);
|
let mut folder_name = self.dir.join(&domain);
|
||||||
|
|
||||||
|
if !std::fs::exists(&folder_name).unwrap() {
|
||||||
|
download_fav_for(domain).await;
|
||||||
|
}
|
||||||
|
|
||||||
for paths in path.split('/') {
|
for paths in path.split('/') {
|
||||||
if !paths.is_empty() {
|
if !paths.is_empty() {
|
||||||
folder_name = folder_name.join(paths);
|
folder_name = folder_name.join(paths);
|
||||||
|
|
18
src/blacklist.rs
Normal file
18
src/blacklist.rs
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
pub fn check_blacklist(domain: &str) -> bool {
|
||||||
|
let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default();
|
||||||
|
|
||||||
|
if blacklist_raw.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let blacklist: Vec<&str> = blacklist_raw.split(',').collect();
|
||||||
|
|
||||||
|
for domain_regex in blacklist {
|
||||||
|
let rgx = regex::Regex::new(domain_regex).unwrap();
|
||||||
|
if rgx.is_match(domain) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
|
@ -15,10 +15,15 @@ pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
|
||||||
Some(favicon_data)
|
Some(favicon_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn download_favicons_for_sites(sites: Vec<String>) {
|
pub async fn download_fav_for(site: &str) {
|
||||||
for site in sites {
|
if let Some(fav) = download_favicon(&site).await {
|
||||||
if let Some(fav) = download_favicon(&site).await {
|
std::fs::write(std::path::Path::new("./favicon").join(site), fav).unwrap();
|
||||||
std::fs::write(std::path::Path::new("./favicon").join(site), fav).unwrap();
|
log::info!("Writting favicon for {site}");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn download_favicons_for_sites(sites: &[String]) {
|
||||||
|
for site in sites {
|
||||||
|
download_fav_for(site).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ use archive::WebsiteArchive;
|
||||||
use rocket::routes;
|
use rocket::routes;
|
||||||
|
|
||||||
mod archive;
|
mod archive;
|
||||||
|
mod blacklist;
|
||||||
mod favicon;
|
mod favicon;
|
||||||
mod pages;
|
mod pages;
|
||||||
|
|
||||||
|
@ -11,7 +12,11 @@ async fn launch() -> _ {
|
||||||
|
|
||||||
let arc = WebsiteArchive::new("./websites");
|
let arc = WebsiteArchive::new("./websites");
|
||||||
|
|
||||||
favicon::download_favicons_for_sites(arc.domains()).await;
|
let archive = arc.clone();
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
favicon::download_favicons_for_sites(&archive.domains()).await;
|
||||||
|
});
|
||||||
|
|
||||||
rocket::build()
|
rocket::build()
|
||||||
.mount(
|
.mount(
|
||||||
|
|
|
@ -113,11 +113,13 @@ pub async fn render_website(
|
||||||
) -> Option<StringResponse> {
|
) -> Option<StringResponse> {
|
||||||
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
||||||
|
|
||||||
let content = document.render_local(if time.is_some() {
|
let content = document
|
||||||
Some(time.unwrap().to_string())
|
.render_local(if time.is_some() {
|
||||||
} else {
|
Some(time.unwrap().to_string())
|
||||||
None
|
} else {
|
||||||
});
|
None
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
if let Some(content) = content {
|
if let Some(content) = content {
|
||||||
return Some(respond_html(&content));
|
return Some(respond_html(&content));
|
||||||
|
@ -127,13 +129,18 @@ pub async fn render_website(
|
||||||
.as_str()
|
.as_str()
|
||||||
== "true"
|
== "true"
|
||||||
{
|
{
|
||||||
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()));
|
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
|
||||||
|
.await;
|
||||||
|
|
||||||
return Some(respond_html(&document.render_local(if time.is_some() {
|
return Some(respond_html(
|
||||||
Some(time.unwrap().to_string())
|
&document
|
||||||
} else {
|
.render_local(if time.is_some() {
|
||||||
None
|
Some(time.unwrap().to_string())
|
||||||
})?));
|
} else {
|
||||||
|
None
|
||||||
|
})
|
||||||
|
.await?,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue