Compare commits

...

10 commits

Author SHA1 Message Date
0f35d34bcb
add custom index 2024-09-17 15:35:36 +02:00
e4005b38ce
update action 2024-07-25 23:55:13 +02:00
704530ee41
refactor 2024-07-10 09:17:09 +02:00
fd581bf9d9
refactor 2024-05-10 07:51:30 +02:00
e5a18ae047
merge fix 2024-04-26 18:54:05 +02:00
193163bca6
docs 2024-04-16 16:39:08 +02:00
ef14646507
limit path scope 2024-04-16 16:26:18 +02:00
2cc0bfbb09
cache ttl 2024-04-15 10:23:38 +02:00
b0ce294ec8
refactor 2024-04-15 09:18:29 +02:00
d1a688d7d7
fix cache 2024-04-13 22:22:17 +02:00
10 changed files with 697 additions and 458 deletions

View file

@ -0,0 +1,35 @@
name: deploy
on:
push:
branches:
- main
jobs:
deploy:
runs-on: host
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
registry: git.hydrar.de
username: ${{ secrets.registry_user }}
password: ${{ secrets.registry_password }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: git.hydrar.de/jmarya/mirrord:latest

View file

@ -1,25 +0,0 @@
name: build
on:
push:
branches:
- main
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Docker
run: curl -fsSL https://get.docker.com | sh
- name: Log in to Docker registry
run: echo "${{ secrets.registry_password }}" | docker login -u "jmarya" --password-stdin git.hydrar.de
- name: Build and push Docker image
run: |
docker build -t git.hydrar.de/jmarya/mirrord:latest .
docker push git.hydrar.de/jmarya/mirrord:latest

885
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,15 +1,15 @@
[package]
name = "mirrord"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
[dependencies]
actix-files = "0.6.5"
actix-web = "4.5.1"
env_logger = "0.11.3"
log = "0.4.21"
rand = "0.8.5"
regex = "1.10.4"
reqwest = "0.12.3"
serde = { version = "1.0.197", features = ["derive"] }
toml = "0.8.12"
actix-files = "0.6"
actix-web = "4.5"
env_logger = "0.11"
log = "0.4"
rand = "0.8"
regex = "1.10"
reqwest = "0.12"
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"

View file

@ -12,6 +12,7 @@ mirrors = [
cache_dir = "./data"
no_cache = '.*(?:db|db\.sig)$'
only_allow = '^\/archlinux'
```
Add this to your mirrorlist:

View file

@ -7,3 +7,4 @@ services:
volumes:
- ./data:/data
- ./sample.conf:/mirrord.conf
# - ./static/:/static # Custom Index

View file

@ -9,7 +9,18 @@ mirrors = [
# Local cache directory
cache_dir = "./data"
#ttl = "3h"
# Time in minutes before a file is counted as out of date
ttl = "180"
# Regex for paths which will never be served from cache
no_cache = '.*(?:db|db\.sig)$'
# Redirect only paths matching this regex to the mirrors, return 404 otherwise
only_allow = '^\/archlinux'
# Show an index page on root path /
# Options:
# - `no` // disable index page
# - `<path>` // custom index page html file
index = "no"

View file

@ -2,15 +2,25 @@ use serde::Deserialize;
use crate::proxy::Mirror;
#[derive(Debug, Deserialize)]
/// Mirrord configuration
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
/// List of mirror hosts to fetch data from.
pub mirrors: Vec<String>,
/// Directory path where cached data will be stored.
pub cache_dir: String,
/// Regex for which paths should never use cache
pub no_cache: String,
/// Time-to-live (TTL) duration for cached data in minutes
pub ttl: Option<usize>,
/// Regex for allowing only specific path requests
pub only_allow: Option<String>,
/// Index Page
pub index: Option<String>,
}
impl Config {
pub fn to_proxy(&self) -> Mirror {
Mirror::new(self.mirrors.clone(), &self.cache_dir, &self.no_cache)
Mirror::new(self)
}
}

View file

@ -7,6 +7,12 @@ async fn index(req: HttpRequest) -> impl Responder {
let path = req.path();
let p: &actix_web::web::Data<Mirror> = req.app_data().unwrap();
if path == "/" {
if let Some(ret) = p.index_page() {
return ret;
}
}
let data = p.get(path, &req).await;
data.unwrap_or_else(|| HttpResponse::NotFound().finish())
}

View file

@ -5,18 +5,32 @@ use std::{
sync::Arc,
};
use crate::config::Config;
// todo : download from many mirrors and compare results if one acts badly
// todo : better logging
// todo : better cache management
pub struct Mirror {
mirrors: Vec<Arc<String>>,
data_dir: String,
no_cache: regex::Regex,
only_allow: Option<regex::Regex>,
config: Config,
}
impl Mirror {
pub fn new(mirrors: Vec<String>, data_dir: &str, no_cache: &str) -> Self {
pub fn new(config: &Config) -> Self {
let mirrors = config.mirrors.clone();
Self {
mirrors: mirrors.into_iter().map(Arc::new).collect(),
data_dir: data_dir.to_string(),
no_cache: regex::Regex::new(no_cache).unwrap(),
data_dir: config.cache_dir.clone(),
no_cache: regex::Regex::new(&config.no_cache).unwrap(),
only_allow: config
.only_allow
.clone()
.map(|x| regex::Regex::new(&x).unwrap()),
config: config.clone(),
}
}
@ -49,6 +63,42 @@ impl Mirror {
}
}
/// Checks if the cached data at the specified path is invalid based on the TTL (time-to-live) setting.
///
/// # Arguments
///
/// * `p` - A reference to the path of the cached data file.
///
/// # Returns
///
/// * `true` if the cached data is invalid (older than TTL), `false` otherwise.
pub fn is_cache_invalid(&self, p: &Path) -> bool {
if self.config.ttl.is_none() {
return false;
}
let try_is_cache_invalid = || {
let modified = p.metadata().ok()?.modified().ok()?;
let current_time = std::time::SystemTime::now();
let elapsed_time = current_time.duration_since(modified).ok()?;
let threshold_duration =
std::time::Duration::from_secs((self.config.ttl.unwrap() * 60).try_into().unwrap());
if elapsed_time > threshold_duration {
log::info!(
"Cached file is {} minutes old. Older than TTL {}.",
(elapsed_time.as_secs() / 60),
self.config.ttl.unwrap()
);
Some(true)
} else {
Some(false)
}
};
try_is_cache_invalid().unwrap_or(false)
}
/// Asynchronously retrieves content from the specified path, either from cache or mirrors.
///
/// This function attempts to retrieve content from the specified `path`. If caching is enabled
@ -68,12 +118,44 @@ impl Mirror {
let p = std::path::Path::new(&path[1..]);
let p = std::path::Path::new(&self.data_dir).join(p);
if !self.no_cache.is_match(path) {
Self::create_cache_dir(p.parent().unwrap());
// check if path is in scope
if let Some(only_allow) = &self.only_allow {
if !only_allow.is_match(path) {
return Some(HttpResponse::NotFound().finish());
}
}
if p.exists() {
// todo : refresh caches
// check if cache should be used
Self::create_cache_dir(p.parent().unwrap());
if !self.no_cache.is_match(path) && !self.is_cache_invalid(&p) {
// use cache if present
if let Some(resp) = self.fetch_cache(&p, req).await {
log::info!("Returning {path} from cache");
return Some(resp);
}
}
// fetch from network, if no response (offline) then use cache
if let Some(resp) = self.fetch_network(path, &p).await {
Some(resp)
} else {
log::info!("Returning {path} from cache");
self.fetch_cache(&p, req).await
}
}
/// Asynchronously fetches cached data from the specified path and returns an HTTP response if the data exists.
///
/// # Arguments
///
/// * `p` - A reference to the path of the cached data file.
/// * `req` - A reference to the HTTP request associated with the fetch operation.
///
/// # Returns
///
/// An optional HTTP response containing the fetched data if it exists, or `None` if the data does not exist.
pub async fn fetch_cache(&self, p: &PathBuf, req: &HttpRequest) -> Option<HttpResponse> {
if p.exists() {
if p.is_dir() {
return Some(
actix_files::NamedFile::open_async(p.join("index"))
@ -82,6 +164,7 @@ impl Mirror {
.into_response(req),
);
}
return Some(
actix_files::NamedFile::open_async(&p)
.await
@ -89,24 +172,52 @@ impl Mirror {
.into_response(req),
);
}
None
}
/// Asynchronously fetches data from network mirrors and returns an HTTP response if successful.
///
/// # Arguments
///
/// * `path` - The path of the resource to fetch from network mirrors.
/// * `local` - A reference to the local path where the fetched data will be stored.
///
/// # Returns
///
/// An optional HTTP response containing the fetched data if successful, or `None` if fetching fails.
pub async fn fetch_network(&self, path: &str, local: &PathBuf) -> Option<HttpResponse> {
let mut mirrors = self.mirrors.clone();
mirrors.shuffle(&mut rand::thread_rng());
log::info!("Fetching {path} from mirrors");
for mirror in mirrors {
let url = format!("{mirror}{path}");
let response = self.get_url(&url, &p).await;
if let Some(res) = response {
if res.status().is_success() {
return Some(res);
if let Some(response) = self.get_url(&url, local).await {
if response.status().is_success() {
return Some(response);
}
}
}
None
}
pub fn index_page(&self) -> Option<HttpResponse> {
if let Some(index) = &self.config.index {
return match index.as_str() {
"no" => None,
file => Some(
HttpResponse::Ok()
.content_type("text/html")
.body(std::fs::read_to_string(file).unwrap()),
),
};
}
return None;
}
/// Asynchronously fetches content from the specified URL and saves it to the provided file path.
///
/// This function sends an HTTP GET request to the URL specified by `path`, retrieves the response,
@ -124,7 +235,7 @@ impl Mirror {
/// or `None` if there was an error during the request or response retrieval.
pub async fn get_url(&self, path: &str, save: &PathBuf) -> Option<HttpResponse> {
log::info!("Fetching {path}");
let response = reqwest::get(path).await.unwrap();
let response = reqwest::get(path).await.ok()?;
let status_code = response.status();
let body_bytes = response.bytes().await.ok()?;
if status_code.is_success() {