From dc10052c1688c5790495a9bb1716e21e64c4f434 Mon Sep 17 00:00:00 2001 From: JMARyA Date: Fri, 3 Jan 2025 00:20:22 +0100 Subject: [PATCH] update --- src/archive.rs | 19 ++++++++++++++++++- src/pages/mod.rs | 18 ++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 9c03191..fdd09ed 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -1,4 +1,4 @@ -use std::{io::Read, path::PathBuf}; +use std::{collections::HashSet, io::Read, path::PathBuf}; use based::{request::RequestContext, result::LogAndIgnore}; use maud::html; @@ -39,6 +39,23 @@ fn internalize_urls(input: &str) -> String { .to_string() } +/// Extract all domains +pub fn extract_domains(input: &str) -> Vec { + let url_pattern = r"https?://([a-zA-Z0-9.-]+)(/[\w./-]*)?"; + let re = regex::Regex::new(url_pattern).unwrap(); + + let mut domains = HashSet::new(); + for caps in re.captures_iter(input) { + let domain = caps[1].trim_start_matches("www."); + domains.insert(domain.to_string()); + } + + let mut domains: Vec<_> = domains.into_iter().collect(); + domains.sort(); + + domains +} + /// Represents a directory containg archived websites #[derive(Debug, Clone)] pub struct WebsiteArchive { diff --git a/src/pages/mod.rs b/src/pages/mod.rs index f3a8680..003325b 100644 --- a/src/pages/mod.rs +++ b/src/pages/mod.rs @@ -15,7 +15,7 @@ use serde_json::json; use webarc::{ ai::{generate_embedding, remove_data_urls, EmbedStore, SearchResult}, - archive::WebsiteArchive, + archive::{extract_domains, WebsiteArchive}, conf::get_config, render_page, }; @@ -88,7 +88,7 @@ pub async fn domain_info_route( let (path_entries, is_doc) = domain.paths(paths.to_str().unwrap()); let path_seperations: Vec<&str> = paths.to_str().unwrap().split('/').collect(); - // TODO : Show domains beeing linked on the page + let domains = extract_domains(&document.render_local(None).await.unwrap_or_default()); let content = html! { h2 class="text-xl font-bold mb-4 flex items-center" { @@ -134,6 +134,20 @@ pub async fn domain_info_route( }; }; }; + + @if !domains.is_empty() { + div class="max-w-md mx-auto p-4 bg-neutral-900 rounded-lg shadow-md" { + h3 class="font-bold mb-2" { "Domains linked on this page:" }; + ul class="space-y-2 p-4" { + @for domain in domains { + a href=(format!("/d/{domain}")) class="flex items-center gap-2 p-3 border bg-neutral-800 rounded hover:shadow-lg transition" { + (favicon(&domain)); + span class="font-medium" { (domain) }; + }; + }; + }; + }; + }; }; render_page(content, ctx).await