parent
3696f61b02
commit
20aeb7edac
4 changed files with 37 additions and 30 deletions
|
@ -80,7 +80,7 @@ impl Document {
|
|||
let content = String::from_utf8_lossy(&buf);
|
||||
|
||||
if get_config().ROUTE_INTERNAL {
|
||||
Some(internalize_urls(&content))
|
||||
Some(internalize_urls(&content, &self.domain))
|
||||
} else {
|
||||
Some(content.to_string())
|
||||
}
|
||||
|
@ -123,4 +123,4 @@ impl Document {
|
|||
res.reverse();
|
||||
res
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ use crate::blacklist::check_blacklist;
|
|||
|
||||
use super::{read_dir, Document};
|
||||
|
||||
|
||||
/// Represents a domain within the website archive
|
||||
pub struct Domain {
|
||||
/// Domain name
|
||||
|
@ -123,4 +122,4 @@ impl PathEntry {
|
|||
pub fn path(&self) -> &String {
|
||||
&self.1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::{collections::HashSet, path::PathBuf};
|
|||
use crate::{
|
||||
blacklist::{check_blacklist, check_blacklist_path},
|
||||
conf::get_config,
|
||||
favicon::download_fav_for
|
||||
favicon::download_fav_for,
|
||||
};
|
||||
|
||||
mod document;
|
||||
|
@ -27,20 +27,28 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
|||
}
|
||||
|
||||
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
|
||||
fn internalize_urls(input: &str) -> String {
|
||||
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
|
||||
fn internalize_urls(input: &str, base: &str) -> String {
|
||||
let url_pattern = r#"(?:(https?://([a-zA-Z0-9.-]+))?(/[\w./-]*))"#;
|
||||
let re = regex::Regex::new(url_pattern).unwrap();
|
||||
|
||||
re.replace_all(input, |caps: ®ex::Captures| {
|
||||
let domain = caps[1].trim_start_matches("www.");
|
||||
let path = &caps[2];
|
||||
if let Some(domain) = caps.get(2) {
|
||||
let domain = domain.as_str().trim_start_matches("www.");
|
||||
let path = caps.get(3).map_or("", |m| m.as_str());
|
||||
|
||||
// Dont transform if in blacklist
|
||||
if check_blacklist(domain) {
|
||||
return format!("https://{domain}/{path}");
|
||||
// Skip transformation if the domain is in the blacklist
|
||||
if check_blacklist(domain) {
|
||||
format!("https://{domain}{path}")
|
||||
} else {
|
||||
format!("/s/{domain}{path}")
|
||||
}
|
||||
} else if let Some(path) = caps.get(3) {
|
||||
// Handle relative paths
|
||||
format!("/s/{base}{}", path.as_str())
|
||||
} else {
|
||||
// Default fallback
|
||||
caps[0].to_string()
|
||||
}
|
||||
|
||||
format!("/s/{domain}/{path}")
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
|
|
@ -175,7 +175,7 @@ pub async fn render_txt_website(
|
|||
}
|
||||
|
||||
pub struct PathSegment {
|
||||
segments: Vec<String>
|
||||
segments: Vec<String>,
|
||||
}
|
||||
|
||||
impl PathSegment {
|
||||
|
@ -187,24 +187,24 @@ impl PathSegment {
|
|||
impl<'r> FromSegments<'r> for PathSegment {
|
||||
type Error = ();
|
||||
|
||||
fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result<Self, Self::Error> {
|
||||
fn from_segments(
|
||||
segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>,
|
||||
) -> Result<Self, Self::Error> {
|
||||
let paths: Vec<_> = segments
|
||||
.filter_map(|x| {
|
||||
if x == "." {
|
||||
return None;
|
||||
}
|
||||
|
||||
if x == ".." {
|
||||
return None
|
||||
}
|
||||
.filter_map(|x| {
|
||||
if x == "." {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(x.to_string())
|
||||
})
|
||||
.collect();
|
||||
if x == ".." {
|
||||
return None;
|
||||
}
|
||||
|
||||
Ok(PathSegment {
|
||||
segments: paths
|
||||
})
|
||||
Some(x.to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(PathSegment { segments: paths })
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue