parent
3696f61b02
commit
20aeb7edac
4 changed files with 37 additions and 30 deletions
|
@ -80,7 +80,7 @@ impl Document {
|
||||||
let content = String::from_utf8_lossy(&buf);
|
let content = String::from_utf8_lossy(&buf);
|
||||||
|
|
||||||
if get_config().ROUTE_INTERNAL {
|
if get_config().ROUTE_INTERNAL {
|
||||||
Some(internalize_urls(&content))
|
Some(internalize_urls(&content, &self.domain))
|
||||||
} else {
|
} else {
|
||||||
Some(content.to_string())
|
Some(content.to_string())
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ use crate::blacklist::check_blacklist;
|
||||||
|
|
||||||
use super::{read_dir, Document};
|
use super::{read_dir, Document};
|
||||||
|
|
||||||
|
|
||||||
/// Represents a domain within the website archive
|
/// Represents a domain within the website archive
|
||||||
pub struct Domain {
|
pub struct Domain {
|
||||||
/// Domain name
|
/// Domain name
|
||||||
|
|
|
@ -3,7 +3,7 @@ use std::{collections::HashSet, path::PathBuf};
|
||||||
use crate::{
|
use crate::{
|
||||||
blacklist::{check_blacklist, check_blacklist_path},
|
blacklist::{check_blacklist, check_blacklist_path},
|
||||||
conf::get_config,
|
conf::get_config,
|
||||||
favicon::download_fav_for
|
favicon::download_fav_for,
|
||||||
};
|
};
|
||||||
|
|
||||||
mod document;
|
mod document;
|
||||||
|
@ -27,20 +27,28 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
|
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
|
||||||
fn internalize_urls(input: &str) -> String {
|
fn internalize_urls(input: &str, base: &str) -> String {
|
||||||
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
|
let url_pattern = r#"(?:(https?://([a-zA-Z0-9.-]+))?(/[\w./-]*))"#;
|
||||||
let re = regex::Regex::new(url_pattern).unwrap();
|
let re = regex::Regex::new(url_pattern).unwrap();
|
||||||
|
|
||||||
re.replace_all(input, |caps: ®ex::Captures| {
|
re.replace_all(input, |caps: ®ex::Captures| {
|
||||||
let domain = caps[1].trim_start_matches("www.");
|
if let Some(domain) = caps.get(2) {
|
||||||
let path = &caps[2];
|
let domain = domain.as_str().trim_start_matches("www.");
|
||||||
|
let path = caps.get(3).map_or("", |m| m.as_str());
|
||||||
|
|
||||||
// Dont transform if in blacklist
|
// Skip transformation if the domain is in the blacklist
|
||||||
if check_blacklist(domain) {
|
if check_blacklist(domain) {
|
||||||
return format!("https://{domain}/{path}");
|
format!("https://{domain}{path}")
|
||||||
|
} else {
|
||||||
|
format!("/s/{domain}{path}")
|
||||||
|
}
|
||||||
|
} else if let Some(path) = caps.get(3) {
|
||||||
|
// Handle relative paths
|
||||||
|
format!("/s/{base}{}", path.as_str())
|
||||||
|
} else {
|
||||||
|
// Default fallback
|
||||||
|
caps[0].to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
format!("/s/{domain}/{path}")
|
|
||||||
})
|
})
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,7 +175,7 @@ pub async fn render_txt_website(
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct PathSegment {
|
pub struct PathSegment {
|
||||||
segments: Vec<String>
|
segments: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PathSegment {
|
impl PathSegment {
|
||||||
|
@ -187,24 +187,24 @@ impl PathSegment {
|
||||||
impl<'r> FromSegments<'r> for PathSegment {
|
impl<'r> FromSegments<'r> for PathSegment {
|
||||||
type Error = ();
|
type Error = ();
|
||||||
|
|
||||||
fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result<Self, Self::Error> {
|
fn from_segments(
|
||||||
|
segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>,
|
||||||
|
) -> Result<Self, Self::Error> {
|
||||||
let paths: Vec<_> = segments
|
let paths: Vec<_> = segments
|
||||||
.filter_map(|x| {
|
.filter_map(|x| {
|
||||||
if x == "." {
|
if x == "." {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
if x == ".." {
|
if x == ".." {
|
||||||
return None
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(x.to_string())
|
Some(x.to_string())
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Ok(PathSegment {
|
Ok(PathSegment { segments: paths })
|
||||||
segments: paths
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue