parent
f657a61d55
commit
4ce2a0ceaf
7 changed files with 83 additions and 21 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -164,7 +164,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
|||
[[package]]
|
||||
name = "based"
|
||||
version = "0.1.0"
|
||||
source = "git+https://git.hydrar.de/jmarya/based#04852f2fbcc301d0c2b4098f613b9450b4474363"
|
||||
source = "git+https://git.hydrar.de/jmarya/based#38373021611149d2ebc6d33a269375ec240527cb"
|
||||
dependencies = [
|
||||
"bcrypt",
|
||||
"chrono",
|
||||
|
|
|
@ -13,3 +13,5 @@ services:
|
|||
- "ROUTE_INTERNAL=true"
|
||||
# Download missing routes on demand
|
||||
- "DOWNLOAD_ON_DEMAND=true"
|
||||
# Blacklisted domains (Comma-seperated regex)
|
||||
- "BLACKLIST_DOMAINS=google.com,.*.youtube.com"
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use based::request::RequestContext;
|
||||
use maud::html;
|
||||
|
||||
use crate::{blacklist::check_blacklist, favicon::download_fav_for, pages::render_page};
|
||||
|
||||
pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
||||
let mut list = Vec::new();
|
||||
|
||||
|
@ -30,6 +35,7 @@ fn internalize_urls(input: &str) -> String {
|
|||
.to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WebsiteArchive {
|
||||
pub dir: PathBuf,
|
||||
}
|
||||
|
@ -41,7 +47,9 @@ pub struct Domain {
|
|||
|
||||
impl Domain {
|
||||
pub fn new(name: &str, dir: PathBuf) -> Self {
|
||||
if !check_blacklist(name) {
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
}
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
dir,
|
||||
|
@ -123,7 +131,14 @@ impl Document {
|
|||
format!("/s/{}/{}", self.domain, self.path)
|
||||
}
|
||||
|
||||
pub fn render_local(&self, version: Option<String>) -> Option<String> {
|
||||
pub async fn render_local(&self, version: Option<String>) -> Option<String> {
|
||||
if check_blacklist(&self.domain) {
|
||||
let content = html! {
|
||||
h3 { "This site is blacklisted" };
|
||||
};
|
||||
return Some(render_page(content, RequestContext::default()).await.1 .1);
|
||||
}
|
||||
|
||||
let mut file_path = self.doc_dir();
|
||||
|
||||
let latest_version = if let Some(version) = version {
|
||||
|
@ -175,14 +190,24 @@ impl WebsiteArchive {
|
|||
}
|
||||
|
||||
/// Archive a URL
|
||||
pub fn archive_url(&self, url: &str) {
|
||||
pub async fn archive_url(&self, url: &str) {
|
||||
let parsed_url = url::Url::parse(url).unwrap();
|
||||
|
||||
let domain = parsed_url.domain().unwrap().trim_start_matches("www.");
|
||||
|
||||
// Deny blacklist
|
||||
if check_blacklist(domain) {
|
||||
return;
|
||||
}
|
||||
|
||||
let path = parsed_url.path();
|
||||
|
||||
let mut folder_name = self.dir.join(&domain);
|
||||
|
||||
if !std::fs::exists(&folder_name).unwrap() {
|
||||
download_fav_for(domain).await;
|
||||
}
|
||||
|
||||
for paths in path.split('/') {
|
||||
if !paths.is_empty() {
|
||||
folder_name = folder_name.join(paths);
|
||||
|
|
18
src/blacklist.rs
Normal file
18
src/blacklist.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
pub fn check_blacklist(domain: &str) -> bool {
|
||||
let blacklist_raw = std::env::var("BLACKLIST_DOMAINS").unwrap_or_default();
|
||||
|
||||
if blacklist_raw.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let blacklist: Vec<&str> = blacklist_raw.split(',').collect();
|
||||
|
||||
for domain_regex in blacklist {
|
||||
let rgx = regex::Regex::new(domain_regex).unwrap();
|
||||
if rgx.is_match(domain) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -15,10 +15,15 @@ pub async fn download_favicon(domain: &str) -> Option<Vec<u8>> {
|
|||
Some(favicon_data)
|
||||
}
|
||||
|
||||
pub async fn download_favicons_for_sites(sites: Vec<String>) {
|
||||
for site in sites {
|
||||
pub async fn download_fav_for(site: &str) {
|
||||
if let Some(fav) = download_favicon(&site).await {
|
||||
std::fs::write(std::path::Path::new("./favicon").join(site), fav).unwrap();
|
||||
log::info!("Writting favicon for {site}");
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn download_favicons_for_sites(sites: &[String]) {
|
||||
for site in sites {
|
||||
download_fav_for(site).await;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ use archive::WebsiteArchive;
|
|||
use rocket::routes;
|
||||
|
||||
mod archive;
|
||||
mod blacklist;
|
||||
mod favicon;
|
||||
mod pages;
|
||||
|
||||
|
@ -11,7 +12,11 @@ async fn launch() -> _ {
|
|||
|
||||
let arc = WebsiteArchive::new("./websites");
|
||||
|
||||
favicon::download_favicons_for_sites(arc.domains()).await;
|
||||
let archive = arc.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
favicon::download_favicons_for_sites(&archive.domains()).await;
|
||||
});
|
||||
|
||||
rocket::build()
|
||||
.mount(
|
||||
|
|
|
@ -113,11 +113,13 @@ pub async fn render_website(
|
|||
) -> Option<StringResponse> {
|
||||
let document = arc.get_domain(domain).path(path.to_str().unwrap());
|
||||
|
||||
let content = document.render_local(if time.is_some() {
|
||||
let content = document
|
||||
.render_local(if time.is_some() {
|
||||
Some(time.unwrap().to_string())
|
||||
} else {
|
||||
None
|
||||
});
|
||||
})
|
||||
.await;
|
||||
|
||||
if let Some(content) = content {
|
||||
return Some(respond_html(&content));
|
||||
|
@ -127,13 +129,18 @@ pub async fn render_website(
|
|||
.as_str()
|
||||
== "true"
|
||||
{
|
||||
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()));
|
||||
arc.archive_url(&format!("https://{domain}/{}", path.to_str().unwrap()))
|
||||
.await;
|
||||
|
||||
return Some(respond_html(&document.render_local(if time.is_some() {
|
||||
return Some(respond_html(
|
||||
&document
|
||||
.render_local(if time.is_some() {
|
||||
Some(time.unwrap().to_string())
|
||||
} else {
|
||||
None
|
||||
})?));
|
||||
})
|
||||
.await?,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue