mirrord/src/proxy.rs
2024-09-17 15:35:36 +02:00

251 lines
9 KiB
Rust

use actix_web::{HttpRequest, HttpResponse};
use rand::prelude::*;
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use crate::config::Config;
// todo : download from many mirrors and compare results if one acts badly
// todo : better logging
// todo : better cache management
pub struct Mirror {
mirrors: Vec<Arc<String>>,
data_dir: String,
no_cache: regex::Regex,
only_allow: Option<regex::Regex>,
config: Config,
}
impl Mirror {
pub fn new(config: &Config) -> Self {
let mirrors = config.mirrors.clone();
Self {
mirrors: mirrors.into_iter().map(Arc::new).collect(),
data_dir: config.cache_dir.clone(),
no_cache: regex::Regex::new(&config.no_cache).unwrap(),
only_allow: config
.only_allow
.clone()
.map(|x| regex::Regex::new(&x).unwrap()),
config: config.clone(),
}
}
/// Creates a cache directory at the specified path.
///
/// If the specified path points to a file, it renames the file by appending ".tmp" to its name,
/// creates the cache directory, and moves the file into the cache directory with the name "index".
///
/// If the specified path does not exist or points to a directory, it creates the directory.
///
/// # Arguments
///
/// * `dir` - A reference to the path where the cache directory should be created.
///
/// # Panics
///
/// This function panics if any of the file system operations fail, such as renaming files or creating directories.
pub fn create_cache_dir(dir: &Path) {
if dir.is_file() {
let tmp_file_path = dir.with_extension("tmp");
std::fs::rename(dir, &tmp_file_path).expect("Failed to rename file");
std::fs::create_dir_all(dir).expect("Failed to create directory");
let index_file_path = dir.join("index");
std::fs::rename(&tmp_file_path, index_file_path)
.expect("Failed to move file into directory");
} else {
std::fs::create_dir_all(dir).unwrap();
}
}
/// Checks if the cached data at the specified path is invalid based on the TTL (time-to-live) setting.
///
/// # Arguments
///
/// * `p` - A reference to the path of the cached data file.
///
/// # Returns
///
/// * `true` if the cached data is invalid (older than TTL), `false` otherwise.
pub fn is_cache_invalid(&self, p: &Path) -> bool {
if self.config.ttl.is_none() {
return false;
}
let try_is_cache_invalid = || {
let modified = p.metadata().ok()?.modified().ok()?;
let current_time = std::time::SystemTime::now();
let elapsed_time = current_time.duration_since(modified).ok()?;
let threshold_duration =
std::time::Duration::from_secs((self.config.ttl.unwrap() * 60).try_into().unwrap());
if elapsed_time > threshold_duration {
log::info!(
"Cached file is {} minutes old. Older than TTL {}.",
(elapsed_time.as_secs() / 60),
self.config.ttl.unwrap()
);
Some(true)
} else {
Some(false)
}
};
try_is_cache_invalid().unwrap_or(false)
}
/// Asynchronously retrieves content from the specified path, either from cache or mirrors.
///
/// This function attempts to retrieve content from the specified `path`. If caching is enabled
/// and the content exists in the cache, it is returned directly from the cache. If not found in
/// the cache or caching is disabled, the function attempts to fetch the content from a list of mirrors.
///
/// # Arguments
///
/// * `path` - The path from which to retrieve content.
/// * `req` - The HTTP request associated with the retrieval operation.
///
/// # Returns
///
/// An `Option` containing an `HttpResponse` if content is successfully retrieved, or `None` if the
/// content could not be found or fetched from any source.
pub async fn get(&self, path: &str, req: &HttpRequest) -> Option<HttpResponse> {
let p = std::path::Path::new(&path[1..]);
let p = std::path::Path::new(&self.data_dir).join(p);
// check if path is in scope
if let Some(only_allow) = &self.only_allow {
if !only_allow.is_match(path) {
return Some(HttpResponse::NotFound().finish());
}
}
// check if cache should be used
Self::create_cache_dir(p.parent().unwrap());
if !self.no_cache.is_match(path) && !self.is_cache_invalid(&p) {
// use cache if present
if let Some(resp) = self.fetch_cache(&p, req).await {
log::info!("Returning {path} from cache");
return Some(resp);
}
}
// fetch from network, if no response (offline) then use cache
if let Some(resp) = self.fetch_network(path, &p).await {
Some(resp)
} else {
log::info!("Returning {path} from cache");
self.fetch_cache(&p, req).await
}
}
/// Asynchronously fetches cached data from the specified path and returns an HTTP response if the data exists.
///
/// # Arguments
///
/// * `p` - A reference to the path of the cached data file.
/// * `req` - A reference to the HTTP request associated with the fetch operation.
///
/// # Returns
///
/// An optional HTTP response containing the fetched data if it exists, or `None` if the data does not exist.
pub async fn fetch_cache(&self, p: &PathBuf, req: &HttpRequest) -> Option<HttpResponse> {
if p.exists() {
if p.is_dir() {
return Some(
actix_files::NamedFile::open_async(p.join("index"))
.await
.ok()?
.into_response(req),
);
}
return Some(
actix_files::NamedFile::open_async(&p)
.await
.ok()?
.into_response(req),
);
}
None
}
/// Asynchronously fetches data from network mirrors and returns an HTTP response if successful.
///
/// # Arguments
///
/// * `path` - The path of the resource to fetch from network mirrors.
/// * `local` - A reference to the local path where the fetched data will be stored.
///
/// # Returns
///
/// An optional HTTP response containing the fetched data if successful, or `None` if fetching fails.
pub async fn fetch_network(&self, path: &str, local: &PathBuf) -> Option<HttpResponse> {
let mut mirrors = self.mirrors.clone();
mirrors.shuffle(&mut rand::thread_rng());
log::info!("Fetching {path} from mirrors");
for mirror in mirrors {
let url = format!("{mirror}{path}");
if let Some(response) = self.get_url(&url, local).await {
if response.status().is_success() {
return Some(response);
}
}
}
None
}
pub fn index_page(&self) -> Option<HttpResponse> {
if let Some(index) = &self.config.index {
return match index.as_str() {
"no" => None,
file => Some(
HttpResponse::Ok()
.content_type("text/html")
.body(std::fs::read_to_string(file).unwrap()),
),
};
}
return None;
}
/// Asynchronously fetches content from the specified URL and saves it to the provided file path.
///
/// This function sends an HTTP GET request to the URL specified by `path`, retrieves the response,
/// and saves the response body to the file specified by `save`. If the HTTP request is successful
/// (status code 2xx), the response body is saved to the file.
///
/// # Arguments
///
/// * `path` - The URL from which to fetch content.
/// * `save` - The file path where the fetched content should be saved.
///
/// # Returns
///
/// An `Option` containing an `HttpResponse` if the request was successful and the response was received,
/// or `None` if there was an error during the request or response retrieval.
pub async fn get_url(&self, path: &str, save: &PathBuf) -> Option<HttpResponse> {
log::info!("Fetching {path}");
let response = reqwest::get(path).await.ok()?;
let status_code = response.status();
let body_bytes = response.bytes().await.ok()?;
if status_code.is_success() {
log::debug!("Writing request to {save:?}");
std::fs::write(save, &body_bytes).unwrap();
}
let mut http_response = HttpResponse::build(
actix_web::http::StatusCode::from_u16(status_code.as_u16()).unwrap(),
);
let http_response = http_response.body(body_bytes);
Some(http_response)
}
}