parent
3e35dd669a
commit
d6d44b457c
7 changed files with 1079 additions and 148 deletions
957
Cargo.lock
generated
957
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -9,9 +9,9 @@ edition = "2021"
|
|||
chrono = "0.4.35"
|
||||
env_logger = "0.11.3"
|
||||
log = "0.4.21"
|
||||
rusqlite = "0.30.0"
|
||||
serde = { version = "1.0.196", features = ["derive"] }
|
||||
serde_json = "1.0.113"
|
||||
toml = "0.8.10"
|
||||
jobdispatcher = { git = "https://git.hydrar.de/jmarya/jobdispatcher" }
|
||||
reqwest = { version = "0.11.26", features = ["blocking", "json"] }
|
||||
sqlx = { version = "0.8", features = ["postgres", "sqlite", "runtime-tokio-native-tls", "derive", "uuid", "chrono", "json"] }
|
||||
tokio = { version = "1.42.0", features = ["full"] }
|
||||
|
|
|
@ -2,6 +2,12 @@
|
|||
# Data Download Directory
|
||||
data_dir = "./download"
|
||||
|
||||
# Sqlite
|
||||
database = "data/download.db"
|
||||
|
||||
# Postgres
|
||||
#database = "postgres://user:password@localhost/dbname"
|
||||
|
||||
[youtube]
|
||||
# Interval in minutes for checking
|
||||
interval = 2
|
||||
|
|
14
migrations/0001_init.sql
Normal file
14
migrations/0001_init.sql
Normal file
|
@ -0,0 +1,14 @@
|
|||
CREATE TABLE IF NOT EXISTS urls (
|
||||
id INTEGER PRIMARY KEY,
|
||||
url TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS item_log (
|
||||
id INTEGER PRIMARY KEY,
|
||||
module TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL,
|
||||
CONSTRAINT unique_module_name_url UNIQUE (module, name, url)
|
||||
);
|
|
@ -9,6 +9,9 @@ use crate::yt_dlp::config::YtDlpConfig;
|
|||
pub struct HoardConfig {
|
||||
/// Top level data download directory
|
||||
pub data_dir: PathBuf,
|
||||
|
||||
// Database (Sqlite or Postgres)
|
||||
pub database: String,
|
||||
}
|
||||
|
||||
/// Top level global config
|
||||
|
|
231
src/db.rs
231
src/db.rs
|
@ -1,101 +1,135 @@
|
|||
use jobdispatcher::{JobDispatcher, JobOrder};
|
||||
use rusqlite::{Connection, OptionalExtension};
|
||||
use std::sync::{mpsc::Receiver, Arc};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DatabaseBackend {
|
||||
pub file: String,
|
||||
pub conn: Connection,
|
||||
pub dispatcher: Arc<JobDispatcher<Query, Out>>,
|
||||
pub recv: Receiver<JobOrder<Query, Out>>,
|
||||
pub db_url: String,
|
||||
pub sqlite: Option<sqlx::Pool<sqlx::Sqlite>>,
|
||||
pub postgres: Option<sqlx::Pool<sqlx::Postgres>>,
|
||||
}
|
||||
|
||||
pub fn ensure_file_exists(path: &str) {
|
||||
// Check if the file exists
|
||||
if !std::path::Path::new(path).exists() {
|
||||
// If the file does not exist, create an empty one
|
||||
match std::fs::File::create(path) {
|
||||
Ok(_) => log::info!("Created {path}"),
|
||||
Err(e) => log::error!("Failed to create file: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DatabaseBackend {
|
||||
pub fn new(file: &str) -> Self {
|
||||
let (dispatcher, recv) = jobdispatcher::JobDispatcher::<Query, Out>::new();
|
||||
let conn = Connection::open(file).unwrap();
|
||||
pub async fn new(db_url: &str) -> Self {
|
||||
let mut sqlite = None;
|
||||
let mut postgres = None;
|
||||
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS urls (
|
||||
id INTEGER PRIMARY KEY,
|
||||
url TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL
|
||||
)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
if db_url.starts_with("postgres") {
|
||||
postgres = Some(
|
||||
sqlx::postgres::PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&std::env::var("DATABASE_URL").unwrap())
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
sqlx::migrate!("./migrations")
|
||||
.run(postgres.as_ref().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
} else {
|
||||
ensure_file_exists(db_url);
|
||||
sqlite = Some(sqlx::sqlite::SqlitePool::connect(db_url).await.unwrap());
|
||||
sqlx::migrate!("./migrations")
|
||||
.run(sqlite.as_ref().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS item_log (
|
||||
id INTEGER PRIMARY KEY,
|
||||
module TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL
|
||||
)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let dispatcher = Arc::new(dispatcher);
|
||||
Self {
|
||||
file: file.to_string(),
|
||||
conn,
|
||||
dispatcher,
|
||||
recv,
|
||||
db_url: db_url.to_string(),
|
||||
sqlite,
|
||||
postgres,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn take_db(&self) -> Database {
|
||||
Database::new(self.dispatcher.clone())
|
||||
Database::new(self.clone())
|
||||
}
|
||||
|
||||
pub fn run(&self) {
|
||||
while let Ok(job) = self.recv.recv() {
|
||||
match job.param {
|
||||
Query::InsertUrl(ref url) => {
|
||||
let timestamp = chrono::Local::now().to_rfc3339();
|
||||
self.conn
|
||||
.execute(
|
||||
"INSERT INTO urls (url, timestamp) VALUES (?, ?)",
|
||||
[url, ×tamp],
|
||||
pub async fn query(&self, param: Query) -> Out {
|
||||
match param {
|
||||
Query::InsertUrl(ref url) => {
|
||||
if let Some(postgres) = self.postgres.as_ref() {
|
||||
sqlx::query("INSERT INTO urls (url, timestamp) VALUES ($1, CURRENT_TIMESTAMP)")
|
||||
.bind(url)
|
||||
.execute(postgres)
|
||||
.await
|
||||
.unwrap();
|
||||
} else {
|
||||
if let Some(sqlite) = self.sqlite.as_ref() {
|
||||
sqlx::query(
|
||||
"INSERT INTO urls (url, timestamp) VALUES ($1, CURRENT_TIMESTAMP)",
|
||||
)
|
||||
.bind(url)
|
||||
.execute(sqlite)
|
||||
.await
|
||||
.unwrap();
|
||||
job.done(Out::Ok);
|
||||
}
|
||||
Query::CheckForUrl(ref url) => {
|
||||
let mut stmt = self
|
||||
.conn
|
||||
.prepare("SELECT COUNT(*) FROM urls WHERE url = ?")
|
||||
.unwrap();
|
||||
let count: i64 = stmt.query_row([url], |row| row.get(0)).unwrap();
|
||||
job.done(Out::Bool(count > 0));
|
||||
}
|
||||
Query::UpdateNewDownloads(ref module, ref name, ref url) => {
|
||||
let timestamp = chrono::Local::now().to_rfc3339();
|
||||
|
||||
// Check if the entry exists
|
||||
let existing_timestamp: Option<String> = self.conn.query_row(
|
||||
"SELECT timestamp FROM item_log WHERE module = ? AND name = ? AND url = ?",
|
||||
[module, name, url],
|
||||
|row| row.get(0)
|
||||
).optional().unwrap();
|
||||
|
||||
if existing_timestamp.is_some() {
|
||||
// Entry exists, update timestamp
|
||||
self.conn.execute(
|
||||
"UPDATE item_log SET timestamp = ? WHERE module = ? AND name = ? AND url = ?",
|
||||
[×tamp, module, name, url]
|
||||
).unwrap();
|
||||
} else {
|
||||
// Entry doesn't exist, insert new row
|
||||
self.conn.execute(
|
||||
"INSERT INTO item_log (module, name, url, timestamp) VALUES (?, ?, ?, ?)",
|
||||
[module, name, url, ×tamp]
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
job.done(Out::Ok);
|
||||
}
|
||||
|
||||
return Out::Ok;
|
||||
}
|
||||
Query::CheckForUrl(ref url) => {
|
||||
let res: (i64,) = if let Some(postgres) = self.postgres.as_ref() {
|
||||
sqlx::query_as("SELECT COUNT(*) FROM urls WHERE url = $1")
|
||||
.bind(url)
|
||||
.fetch_one(postgres)
|
||||
.await
|
||||
.unwrap()
|
||||
} else {
|
||||
sqlx::query_as("SELECT COUNT(*) FROM urls WHERE url = $1")
|
||||
.bind(url)
|
||||
.fetch_one(self.sqlite.as_ref().unwrap())
|
||||
.await
|
||||
.unwrap()
|
||||
};
|
||||
|
||||
let count: i64 = res.0;
|
||||
return Out::Bool(count > 0);
|
||||
}
|
||||
Query::UpdateNewDownloads(ref module, ref name, ref url) => {
|
||||
if let Some(postgres) = self.postgres.as_ref() {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO item_log (module, name, url, CURRENT_TIMESTAMP)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (module, name, url)
|
||||
DO UPDATE SET timestamp = CURRENT_TIMESTAMP
|
||||
"#,
|
||||
)
|
||||
.bind(module)
|
||||
.bind(name)
|
||||
.bind(url)
|
||||
.execute(postgres)
|
||||
.await
|
||||
.unwrap();
|
||||
} else {
|
||||
if let Some(sqlite) = self.sqlite.as_ref() {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO item_log (module, name, url, timestamp)
|
||||
VALUES ($1, $2, $3, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT (module, name, url)
|
||||
DO UPDATE SET timestamp = CURRENT_TIMESTAMP
|
||||
"#,
|
||||
)
|
||||
.bind(module)
|
||||
.bind(name)
|
||||
.bind(url)
|
||||
.execute(sqlite)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
return Out::Ok;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -115,17 +149,20 @@ pub enum Out {
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct Database {
|
||||
conn: Arc<JobDispatcher<Query, Out>>,
|
||||
conn: DatabaseBackend,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
pub fn new(conn: Arc<JobDispatcher<Query, Out>>) -> Self {
|
||||
pub fn new(conn: DatabaseBackend) -> Self {
|
||||
Self { conn }
|
||||
}
|
||||
|
||||
/// Insert a URL into the database as already downloaded
|
||||
pub fn insert_url(&self, url: &str) {
|
||||
self.conn.send(Query::InsertUrl(url.to_string()));
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
rt.block_on(async {
|
||||
self.conn.query(Query::InsertUrl(url.to_string())).await;
|
||||
});
|
||||
}
|
||||
|
||||
/// Check if a URL is already in the database
|
||||
|
@ -142,19 +179,27 @@ impl Database {
|
|||
/// }
|
||||
/// ```
|
||||
pub fn check_for_url(&self, url: &str) -> bool {
|
||||
match self.conn.send(Query::CheckForUrl(url.to_string())) {
|
||||
Out::Ok => false,
|
||||
Out::Bool(b) => b,
|
||||
}
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
rt.block_on(async {
|
||||
match self.conn.query(Query::CheckForUrl(url.to_string())).await {
|
||||
Out::Ok => false,
|
||||
Out::Bool(b) => b,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Keep a record on when download happen.
|
||||
/// This takes a `module`, `name` and `url` and saves a timestamp to the db.
|
||||
pub fn update_new_downloads(&self, module: &str, name: &str, url: &str) {
|
||||
self.conn.send(Query::UpdateNewDownloads(
|
||||
module.to_string(),
|
||||
name.to_string(),
|
||||
url.to_string(),
|
||||
));
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
rt.block_on(async {
|
||||
self.conn
|
||||
.query(Query::UpdateNewDownloads(
|
||||
module.to_string(),
|
||||
name.to_string(),
|
||||
url.to_string(),
|
||||
))
|
||||
.await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
12
src/main.rs
12
src/main.rs
|
@ -1,10 +1,13 @@
|
|||
#![feature(async_closure)]
|
||||
|
||||
use hoard::config::GlobalConfig;
|
||||
use hoard::{ensure_dir_exists, Module};
|
||||
|
||||
// todo : migrate to async code?
|
||||
// todo : better log options
|
||||
|
||||
fn main() {
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
std::env::set_var("RUST_LOG", "trace");
|
||||
|
@ -21,11 +24,12 @@ fn main() {
|
|||
|
||||
log::info!("Starting hoard");
|
||||
|
||||
let db = hoard::db::DatabaseBackend::new("data/download.db");
|
||||
let config: GlobalConfig =
|
||||
toml::from_str(&std::fs::read_to_string("config.toml").unwrap()).unwrap();
|
||||
ensure_dir_exists(&config.hoard.data_dir);
|
||||
|
||||
let db = hoard::db::DatabaseBackend::new(&config.hoard.database).await;
|
||||
|
||||
let mut modules: Vec<Box<dyn Module>> = vec![];
|
||||
|
||||
if let Some(yt_config) = config.youtube {
|
||||
|
@ -56,10 +60,6 @@ fn main() {
|
|||
)));
|
||||
}
|
||||
|
||||
let _db_thread = std::thread::spawn(move || {
|
||||
db.run();
|
||||
});
|
||||
|
||||
let threads: Vec<_> = modules
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
|
|
Loading…
Reference in a new issue