any mime
Some checks are pending
ci/woodpecker/push/build Pipeline is pending

This commit is contained in:
JMARyA 2025-02-24 19:30:20 +01:00
parent a9f758cd9b
commit 2e5b4fc3d2
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
9 changed files with 141 additions and 89 deletions

View file

@ -3,9 +3,9 @@ use std::{io::Read, path::PathBuf};
use based::{request::RequestContext, ui::components::prelude::Shell};
use maud::html;
use crate::{blacklist::check_blacklist, conf::get_config, render_page};
use crate::{blacklist::check_blacklist, render_page};
use super::{internalize_urls, read_dir};
use super::read_dir;
/// Represents a document within a domain
pub struct Document {
@ -52,43 +52,39 @@ impl Document {
///
/// # Returns
/// An `Option` containing the rendered content as a string, or `None` if nothing could be rendered.
pub async fn render_local(&self, version: Option<String>, shell: &Shell) -> Option<String> {
pub async fn render_local(
&self,
version: Option<String>,
shell: &Shell,
) -> Result<Vec<u8>, String> {
if check_blacklist(&self.domain) {
let content = html! {
h3 { "This site is blacklisted" };
};
return Some(
render_page(content, RequestContext::default(), shell)
.await
.1
.1,
);
return Err(render_page(content, RequestContext::default(), shell)
.await
.1
.1);
}
let mut file_path = self.doc_dir();
let latest_version = if let Some(version) = version {
format!("index_{version}.html")
format!("index_{version}")
} else {
let versions = self.versions();
let version = versions.first().cloned()?;
format!("index_{version}.html")
let version = versions.first().cloned().ok_or(String::new())?;
format!("index_{version}")
};
file_path = file_path.join(latest_version);
let mut buf = Vec::new();
std::fs::File::open(file_path)
.ok()?
.map_err(|_| String::new())?
.read_to_end(&mut buf)
.unwrap();
let content = String::from_utf8_lossy(&buf);
if get_config().ROUTE_INTERNAL {
Some(internalize_urls(&content, &self.domain))
} else {
Some(content.to_string())
}
Ok(buf)
}
/// Determines the directory where the document is stored.
@ -127,12 +123,8 @@ impl Document {
let mut res: Vec<String> = read_dir(&self.doc_dir())
.into_iter()
.filter_map(|x| {
if x.starts_with("index_") && x.ends_with(".html") {
return Some(
x.trim_start_matches("index_")
.trim_end_matches(".html")
.to_string(),
);
if x.starts_with("index_") {
return Some(x.trim_start_matches("index_").to_string());
}
None

View file

@ -98,7 +98,7 @@ impl Domain {
.filter(|x| !x.is_empty())
.collect::<Vec<&str>>()
.join("/");
if entry.starts_with("index_") && entry.ends_with(".html") {
if entry.starts_with("index_") {
is_doc = true;
continue;
}

View file

@ -32,7 +32,7 @@ pub fn read_dir(dir: &PathBuf) -> Vec<String> {
}
/// Rewrite all URLs in `input` to the format `/s/<domain>/<path..>`
fn internalize_urls(input: &str, base: &str) -> String {
pub fn internalize_urls(input: &str, base: &str) -> String {
// todo : fix regex, domains without path are not captured
let url_pattern = r#"(\ |"|')(?:(<?)(https?:\/\/([a-zA-Z0-9.-]+))?(\/[\w./-]*))"#;
let re = regex::Regex::new(url_pattern).unwrap();
@ -172,7 +172,7 @@ impl WebsiteArchive {
std::fs::create_dir_all(&folder_name).unwrap();
let timestamp = chrono::Utc::now().format("%Y-%m-%d").to_string();
let filename = folder_name.join(format!("index_{timestamp}.html"));
let filename = folder_name.join(format!("index_{timestamp}"));
log::info!("Archiving {url} to {}", filename.to_str().unwrap());
@ -238,10 +238,17 @@ fn run_command(cmd: &[&str]) {
let child = cmd_setup.spawn().unwrap();
let status = child.wait_with_output().unwrap();
assert!(status.status.success());
if !status.status.success() {
log::warn!(
"Command {cmd:?} exited with code {}",
status.status.code().unwrap_or_default()
)
}
}
pub async fn index_archive_db(arc: &WebsiteArchive) {
// TODO : more index attrs size,mime
log::info!("Indexing archive");
for dom in arc.domains() {