diff --git a/src/archive/document.rs b/src/archive/document.rs index aa08772..7cdf0c5 100644 --- a/src/archive/document.rs +++ b/src/archive/document.rs @@ -80,7 +80,7 @@ impl Document { let content = String::from_utf8_lossy(&buf); if get_config().ROUTE_INTERNAL { - Some(internalize_urls(&content)) + Some(internalize_urls(&content, &self.domain)) } else { Some(content.to_string()) } @@ -123,4 +123,4 @@ impl Document { res.reverse(); res } -} \ No newline at end of file +} diff --git a/src/archive/domain.rs b/src/archive/domain.rs index 79239aa..98d6f16 100644 --- a/src/archive/domain.rs +++ b/src/archive/domain.rs @@ -6,7 +6,6 @@ use crate::blacklist::check_blacklist; use super::{read_dir, Document}; - /// Represents a domain within the website archive pub struct Domain { /// Domain name @@ -123,4 +122,4 @@ impl PathEntry { pub fn path(&self) -> &String { &self.1 } -} \ No newline at end of file +} diff --git a/src/archive/mod.rs b/src/archive/mod.rs index 7dcaa04..b6f9d8f 100644 --- a/src/archive/mod.rs +++ b/src/archive/mod.rs @@ -3,7 +3,7 @@ use std::{collections::HashSet, path::PathBuf}; use crate::{ blacklist::{check_blacklist, check_blacklist_path}, conf::get_config, - favicon::download_fav_for + favicon::download_fav_for, }; mod document; @@ -27,20 +27,28 @@ pub fn read_dir(dir: &PathBuf) -> Vec { } /// Rewrite all URLs in `input` to the format `/s//` -fn internalize_urls(input: &str) -> String { - let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)"; +fn internalize_urls(input: &str, base: &str) -> String { + let url_pattern = r#"(?:(https?://([a-zA-Z0-9.-]+))?(/[\w./-]*))"#; let re = regex::Regex::new(url_pattern).unwrap(); re.replace_all(input, |caps: ®ex::Captures| { - let domain = caps[1].trim_start_matches("www."); - let path = &caps[2]; + if let Some(domain) = caps.get(2) { + let domain = domain.as_str().trim_start_matches("www."); + let path = caps.get(3).map_or("", |m| m.as_str()); - // Dont transform if in blacklist - if check_blacklist(domain) { - return format!("https://{domain}/{path}"); + // Skip transformation if the domain is in the blacklist + if check_blacklist(domain) { + format!("https://{domain}{path}") + } else { + format!("/s/{domain}{path}") + } + } else if let Some(path) = caps.get(3) { + // Handle relative paths + format!("/s/{base}{}", path.as_str()) + } else { + // Default fallback + caps[0].to_string() } - - format!("/s/{domain}/{path}") }) .to_string() } diff --git a/src/pages/mod.rs b/src/pages/mod.rs index a0cc978..b698af4 100644 --- a/src/pages/mod.rs +++ b/src/pages/mod.rs @@ -175,7 +175,7 @@ pub async fn render_txt_website( } pub struct PathSegment { - segments: Vec + segments: Vec, } impl PathSegment { @@ -187,24 +187,24 @@ impl PathSegment { impl<'r> FromSegments<'r> for PathSegment { type Error = (); - fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result { + fn from_segments( + segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>, + ) -> Result { let paths: Vec<_> = segments - .filter_map(|x| { - if x == "." { - return None; - } - - if x == ".." { - return None - } + .filter_map(|x| { + if x == "." { + return None; + } - Some(x.to_string()) - }) - .collect(); + if x == ".." { + return None; + } - Ok(PathSegment { - segments: paths - }) + Some(x.to_string()) + }) + .collect(); + + Ok(PathSegment { segments: paths }) } }