fix urls
Some checks failed
ci/woodpecker/push/build Pipeline failed

This commit is contained in:
JMARyA 2025-01-14 19:10:49 +01:00
parent 3696f61b02
commit 20aeb7edac
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
4 changed files with 37 additions and 30 deletions

View file

@ -80,7 +80,7 @@ impl Document {
let content = String::from_utf8_lossy(&buf);
if get_config().ROUTE_INTERNAL {
Some(internalize_urls(&content))
Some(internalize_urls(&content, &self.domain))
} else {
Some(content.to_string())
}

View file

@ -6,7 +6,6 @@ use crate::blacklist::check_blacklist;
use super::{read_dir, Document};
/// Represents a domain within the website archive
pub struct Domain {
/// Domain name

View file

@ -3,7 +3,7 @@ use std::{collections::HashSet, path::PathBuf};
use crate::{
blacklist::{check_blacklist, check_blacklist_path},
conf::get_config,
favicon::download_fav_for
favicon::download_fav_for,
};
mod document;
@ -27,20 +27,28 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
}
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
fn internalize_urls(input: &str) -> String {
let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)";
fn internalize_urls(input: &str, base: &str) -> String {
let url_pattern = r#"(?:(https?://([a-zA-Z0-9.-]+))?(/[\w./-]*))"#;
let re = regex::Regex::new(url_pattern).unwrap();
re.replace_all(input, |caps: &regex::Captures| {
let domain = caps[1].trim_start_matches("www.");
let path = &caps[2];
if let Some(domain) = caps.get(2) {
let domain = domain.as_str().trim_start_matches("www.");
let path = caps.get(3).map_or("", |m| m.as_str());
// Dont transform if in blacklist
// Skip transformation if the domain is in the blacklist
if check_blacklist(domain) {
return format!("https://{domain}/{path}");
format!("https://{domain}{path}")
} else {
format!("/s/{domain}{path}")
}
} else if let Some(path) = caps.get(3) {
// Handle relative paths
format!("/s/{base}{}", path.as_str())
} else {
// Default fallback
caps[0].to_string()
}
format!("/s/{domain}/{path}")
})
.to_string()
}

View file

@ -175,7 +175,7 @@ pub async fn render_txt_website(
}
pub struct PathSegment {
segments: Vec<String>
segments: Vec<String>,
}
impl PathSegment {
@ -187,7 +187,9 @@ impl PathSegment {
impl<'r> FromSegments<'r> for PathSegment {
type Error = ();
fn from_segments(segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>) -> Result<Self, Self::Error> {
fn from_segments(
segments: rocket::http::uri::Segments<'r, rocket::http::uri::fmt::Path>,
) -> Result<Self, Self::Error> {
let paths: Vec<_> = segments
.filter_map(|x| {
if x == "." {
@ -195,16 +197,14 @@ impl<'r> FromSegments<'r> for PathSegment {
}
if x == ".." {
return None
return None;
}
Some(x.to_string())
})
.collect();
Ok(PathSegment {
segments: paths
})
Ok(PathSegment { segments: paths })
}
}