fix urls
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
JMARyA 2025-01-14 19:10:49 +01:00
parent 3696f61b02
commit 20aeb7edac
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
4 changed files with 37 additions and 30 deletions

View file

@ -80,7 +80,7 @@ impl Document {
let content = String::from_utf8_lossy(&buf); let content = String::from_utf8_lossy(&buf);
if get_config().ROUTE_INTERNAL { if get_config().ROUTE_INTERNAL {
Some(internalize_urls(&content)) Some(internalize_urls(&content, &self.domain))
} else { } else {
Some(content.to_string()) Some(content.to_string())
} }

View file

@ -6,7 +6,6 @@ use crate::blacklist::check_blacklist;
use super::{read_dir, Document}; use super::{read_dir, Document};
/// Represents a domain within the website archive /// Represents a domain within the website archive
pub struct Domain { pub struct Domain {
/// Domain name /// Domain name

View file

@ -3,7 +3,7 @@ use std::{collections::HashSet, path::PathBuf};
use crate::{ use crate::{
blacklist::{check_blacklist, check_blacklist_path}, blacklist::{check_blacklist, check_blacklist_path},
conf::get_config, conf::get_config,
favicon::download_fav_for favicon::download_fav_for,
}; };
mod document; mod document;
@ -27,20 +27,28 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
} }
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>` /// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
fn internalize_urls(input: &str) -> String { fn internalize_urls(input: &str, base: &str) -> String {
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)"; let url_pattern = r#"(?:(https?://([a-zA-Z0-9.-]+))?(/[\w./-]*))"#;
let re = regex::Regex::new(url_pattern).unwrap(); let re = regex::Regex::new(url_pattern).unwrap();
re.replace_all(input, |caps: &regex::Captures| { re.replace_all(input, |caps: &regex::Captures| {
let domain = caps[1].trim_start_matches("www."); if let Some(domain) = caps.get(2) {
let path = &caps[2]; let domain = domain.as_str().trim_start_matches("www.");
let path = caps.get(3).map_or("", |m| m.as_str());
// Dont transform if in blacklist // Skip transformation if the domain is in the blacklist
if check_blacklist(domain) { if check_blacklist(domain) {
return format!("https://{domain}/{path}"); format!("https://{domain}{path}")
} else {
format!("/s/{domain}{path}")
}
} else if let Some(path) = caps.get(3) {
// Handle relative paths
format!("/s/{base}{}", path.as_str())
} else {
// Default fallback
caps[0].to_string()
} }
format!("/s/{domain}/{path}")
}) })
.to_string() .to_string()
} }

View file

@ -175,7 +175,7 @@ pub async fn render_txt_website(
} }
pub struct PathSegment { pub struct PathSegment {
segments: Vec<String> segments: Vec<String>,
} }
impl PathSegment { impl PathSegment {
@ -187,24 +187,24 @@ impl PathSegment {
impl<'r> FromSegments<'r> for PathSegment { impl<'r> FromSegments<'r> for PathSegment {
type Error = (); type Error = ();
fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result<Self, Self::Error> { fn from_segments(
segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>,
) -> Result<Self, Self::Error> {
let paths: Vec<_> = segments let paths: Vec<_> = segments
.filter_map(|x| { .filter_map(|x| {
if x == "." { if x == "." {
return None; return None;
} }
if x == ".." { if x == ".." {
return None return None;
} }
Some(x.to_string()) Some(x.to_string())
}) })
.collect(); .collect();
Ok(PathSegment { Ok(PathSegment { segments: paths })
segments: paths
})
} }
} }