Compare commits

...

10 commits

Author SHA1 Message Date
4090dbaac7
fix 2024-07-28 21:43:10 +02:00
260ee0b44f
update action 2024-07-25 23:54:19 +02:00
75bd4f49c1
refactor 2024-06-02 23:04:09 +02:00
222bf160dc
fix user already exists 2024-05-11 20:13:13 +02:00
2cfd3b4f54
add category folders for yt-dlp 2024-05-05 16:29:20 +02:00
ebff54a8ba
add webhook support 2024-03-17 00:42:36 +01:00
fc0d7c0307
fix db 2024-03-14 12:41:57 +01:00
815345dc4f
fix 2024-03-14 12:31:49 +01:00
c2ddb4a738
non root user 2024-03-14 12:27:44 +01:00
3e4e3820ff
add tracking for new downloads 2024-03-11 11:15:14 +01:00
15 changed files with 1150 additions and 194 deletions

View file

@ -0,0 +1,36 @@
name: deploy
on:
push:
branches:
- main
jobs:
deploy:
runs-on: host
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
registry: git.hydrar.de
username: ${{ secrets.registry_user }}
password: ${{ secrets.registry_password }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
# platforms: linux/amd64,linux/arm64
platforms: linux/amd64
push: true
tags: git.hydrar.de/jmarya/hoard:latest

View file

@ -1,25 +0,0 @@
name: build
on:
push:
branches:
- main
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Docker
run: curl -fsSL https://get.docker.com | sh
- name: Log in to Docker registry
run: echo "${{ secrets.registry_password }}" | docker login -u "${{ secrets.registry_user }}" --password-stdin git.hydrar.de
- name: Build and push Docker image
run: |
docker build -t git.hydrar.de/jmarya/hoard:latest .
docker push git.hydrar.de/jmarya/hoard:latest

821
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -14,3 +14,4 @@ serde = { version = "1.0.196", features = ["derive"] }
serde_json = "1.0.113"
toml = "0.8.10"
jobdispatcher = { git = "https://git.hydrar.de/jmarya/jobdispatcher" }
reqwest = { version = "0.11.26", features = ["blocking", "json"] }

View file

@ -22,7 +22,8 @@ RUN pacman -Sy --noconfirm archlinux-keyring && \
python-mutagen
COPY --from=builder /app/target/release/hoard /hoard
COPY ./entrypoint.sh /entrypoint.sh
WORKDIR /
CMD ["/hoard"]
CMD ["/bin/bash", "/entrypoint.sh"]

View file

@ -7,5 +7,5 @@ services:
TZ: Europe/Berlin
volumes:
- ./download:/download
- ./download.db:/download.db
- ./data:/data
- ./config.toml:/config.toml

18
entrypoint.sh Normal file
View file

@ -0,0 +1,18 @@
#!/bin/bash
# Check if the user already exists
if id hoard &>/dev/null; then
echo "User hoard already exists."
else
# Create the user
echo "Creating User ${UID:-1000}"
useradd -m -u "${UID:-1000}" hoard || exit 1
fi
# Perform other setup tasks
chown -R hoard /download
mkdir -p /.cache && chown -R hoard /.cache
chown -R hoard /data
# Start the application as the user
su hoard -c /hoard

View file

@ -2,24 +2,24 @@ use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use crate::yt_dlp::YtDlpConfig;
use crate::yt_dlp::config::YtDlpConfig;
/// General settings for hoard
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HoardConfig {
// Top level data download directory
/// Top level data download directory
pub data_dir: PathBuf,
}
/// Top level global config
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalConfig {
// Hoard Configuration
/// Hoard Configuration
pub hoard: HoardConfig,
// Configuration for the YouTube Module
/// Configuration for the `YouTube` Module
pub youtube: Option<crate::youtube::YouTubeConfig>,
// Configuration for the SoundCloud Module
/// Configuration for the `SoundCloud` Module
pub soundcloud: Option<crate::soundcloud::SoundCloudConfig>,
// Custom instances of yt-dlp
/// Custom instances of `yt-dlp`
pub yt_dlp: Option<Vec<YtDlpConfig>>,
}

View file

@ -1,5 +1,5 @@
use jobdispatcher::{JobDispatcher, JobOrder};
use rusqlite::Connection;
use rusqlite::{Connection, OptionalExtension};
use std::sync::{mpsc::Receiver, Arc};
pub struct DatabaseBackend {
@ -24,6 +24,18 @@ impl DatabaseBackend {
)
.unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS item_log (
id INTEGER PRIMARY KEY,
module TEXT NOT NULL,
name TEXT NOT NULL,
url TEXT NOT NULL,
timestamp TEXT NOT NULL
)",
[],
)
.unwrap();
let dispatcher = Arc::new(dispatcher);
Self {
file: file.to_string(),
@ -51,13 +63,39 @@ impl DatabaseBackend {
job.done(Out::Ok);
}
Query::CheckForUrl(ref url) => {
let conn = Connection::open(&self.file).unwrap();
let mut stmt = conn
let mut stmt = self
.conn
.prepare("SELECT COUNT(*) FROM urls WHERE url = ?")
.unwrap();
let count: i64 = stmt.query_row([url], |row| row.get(0)).unwrap();
job.done(Out::Bool(count > 0));
}
Query::UpdateNewDownloads(ref module, ref name, ref url) => {
let timestamp = chrono::Local::now().to_rfc3339();
// Check if the entry exists
let existing_timestamp: Option<String> = self.conn.query_row(
"SELECT timestamp FROM item_log WHERE module = ? AND name = ? AND url = ?",
[module, name, url],
|row| row.get(0)
).optional().unwrap();
if existing_timestamp.is_some() {
// Entry exists, update timestamp
self.conn.execute(
"UPDATE item_log SET timestamp = ? WHERE module = ? AND name = ? AND url = ?",
[&timestamp, module, name, url]
).unwrap();
} else {
// Entry doesn't exist, insert new row
self.conn.execute(
"INSERT INTO item_log (module, name, url, timestamp) VALUES (?, ?, ?, ?)",
[module, name, url, &timestamp]
).unwrap();
}
job.done(Out::Ok);
}
}
}
}
@ -66,6 +104,7 @@ impl DatabaseBackend {
pub enum Query {
InsertUrl(String),
CheckForUrl(String),
UpdateNewDownloads(String, String, String),
}
pub enum Out {
@ -84,14 +123,38 @@ impl Database {
Self { conn }
}
/// Insert a URL into the database as already downloaded
pub fn insert_url(&self, url: &str) {
self.conn.send(Query::InsertUrl(url.to_string()));
}
/// Check if a URL is already in the database
///
/// # Return
/// Returns `true` if already present, `false` otherwise
///
/// # Example
/// You could use this function like that:
///
/// ```rust
/// if !db.check_for_url(some_url) {
/// // do download
/// }
/// ```
pub fn check_for_url(&self, url: &str) -> bool {
match self.conn.send(Query::CheckForUrl(url.to_string())) {
Out::Ok => false,
Out::Bool(b) => b,
}
}
/// Keep a record on when download happen.
/// This takes a `module`, `name` and `url` and saves a timestamp to the db.
pub fn update_new_downloads(&self, module: &str, name: &str, url: &str) {
self.conn.send(Query::UpdateNewDownloads(
module.to_string(),
name.to_string(),
url.to_string(),
));
}
}

24
src/lib.rs Normal file
View file

@ -0,0 +1,24 @@
use std::path::PathBuf;
pub mod config;
pub mod db;
pub mod soundcloud;
pub mod youtube;
pub mod yt_dlp;
pub fn ensure_dir_exists(dir_path: &PathBuf) {
let path = std::path::Path::new(dir_path);
if !path.exists() {
std::fs::create_dir_all(path).unwrap();
}
}
/// Generic module implementation
///
/// Each module gets it's own thread to work for itself.
pub trait Module: Send {
/// friendly name for module
fn name(&self) -> String;
/// module main loop
fn run(&self);
}

View file

@ -1,30 +1,9 @@
use std::path::PathBuf;
mod config;
mod db;
mod soundcloud;
mod youtube;
mod yt_dlp;
use config::GlobalConfig;
use crate::yt_dlp::YtDlpModule;
use hoard::config::GlobalConfig;
use hoard::{ensure_dir_exists, Module};
// todo : migrate to async code?
// todo : better log options
pub fn ensure_dir_exists(dir_path: &PathBuf) {
let path = std::path::Path::new(dir_path);
if !path.exists() {
std::fs::create_dir_all(path).unwrap();
}
}
trait Module: Send {
fn name(&self) -> String;
fn run(&self);
}
fn main() {
#[cfg(debug_assertions)]
{
@ -42,19 +21,23 @@ fn main() {
log::info!("Starting hoard");
let db = db::DatabaseBackend::new("download.db");
let db = hoard::db::DatabaseBackend::new("data/download.db");
let config: GlobalConfig =
toml::from_str(&std::fs::read_to_string("config.toml").unwrap()).unwrap();
ensure_dir_exists(&config.hoard.data_dir);
let mut modules: Vec<Box<dyn Module>> = vec![Box::new(youtube::YouTubeModule::new(
config.youtube.unwrap(),
let mut modules: Vec<Box<dyn Module>> = vec![];
if let Some(yt_config) = config.youtube {
modules.push(Box::new(hoard::youtube::YouTubeModule::new(
yt_config,
db.take_db(),
config.hoard.data_dir.join("youtube"),
))];
)));
}
if let Some(sc_config) = config.soundcloud {
modules.push(Box::new(soundcloud::SoundCloudModule::new(
modules.push(Box::new(hoard::soundcloud::SoundCloudModule::new(
sc_config,
db.take_db(),
config.hoard.data_dir.join("soundcloud"),
@ -66,7 +49,7 @@ fn main() {
.name
.clone()
.unwrap_or_else(|| "yt_dlp".to_string());
modules.push(Box::new(YtDlpModule::new(
modules.push(Box::new(hoard::yt_dlp::YtDlpModule::new(
yt_dlp_mod,
db.take_db(),
config.hoard.data_dir.join(mod_name),

View file

@ -3,45 +3,47 @@ use std::{collections::HashMap, path::PathBuf};
use serde::{Deserialize, Serialize};
use crate::{
yt_dlp::{YtDlpConfig, YtDlpModule},
yt_dlp::{config::YtDlpConfig, YtDlpModule},
Module,
};
/// Configuration for the `SoundCloud` Module
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SoundCloudConfig {
// Interval in minutes between checks
/// Interval in minutes between checks
pub interval: u64,
/// Amount of items to query
pub limit: Option<u64>,
// Items to check
pub artists: HashMap<String, String>,
// Output Template for yt-dlp
/// Items to check
pub artists: HashMap<String, toml::Value>,
/// Output Template for yt-dlp
pub output_format: Option<String>,
// Download comments
/// Download comments
pub write_comments: Option<bool>,
// Download description
/// Download description
pub write_description: Option<bool>,
// Download cover
/// Download cover
pub write_cover: Option<bool>,
// Download subtitles
/// Download subtitles
pub write_subs: Option<bool>,
// Audio Format
/// Audio Format
pub audio_format: Option<String>,
// Embed thumbnail
/// Embed thumbnail
pub embed_thumbnail: Option<bool>,
// Embed metadata
/// Embed metadata
pub embed_metadata: Option<bool>,
// Embed chapters
/// Embed chapters
pub embed_chapters: Option<bool>,
// Embed info.json
/// Embed info.json
pub embed_info_json: Option<bool>,
// Split by chapter
/// Split by chapter
pub split_chapters: Option<bool>,
// Format Selection
/// Format Selection
pub format: Option<String>,
// Cookie File
/// Cookie File
pub cookie: Option<String>,
/// Webhooks for notifications
pub webhooks: Option<Vec<String>>,
}
#[derive(Clone)]
@ -75,6 +77,7 @@ impl SoundCloudModule {
format: config.format,
cookie: config.cookie,
audio_only: Some(true),
webhooks: config.webhooks,
},
db,
root_dir,

View file

@ -2,50 +2,49 @@ use std::{collections::HashMap, path::PathBuf};
use serde::{Deserialize, Serialize};
use crate::{
yt_dlp::{YtDlpConfig, YtDlpModule},
Module,
};
use crate::{yt_dlp::config::YtDlpConfig, yt_dlp::YtDlpModule, Module};
/// Configuration for the `YouTube` Module
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YouTubeConfig {
// Interval in minutes between checks
/// Interval in minutes between checks
interval: u64,
/// Amount of videos to query
limit: Option<u64>,
// Channels to check
channels: HashMap<String, String>,
// Format of the Thumbnail
/// Channels to check
channels: HashMap<String, toml::Value>,
/// Format of the Thumbnail
thumbnail_format: Option<String>,
// Output Template for yt-dlp
/// Output Template for yt-dlp
output_format: Option<String>,
// Download description
/// Download description
pub write_description: Option<bool>,
// Download info.json
/// Download info.json
pub write_info_json: Option<bool>,
// Download comments
/// Download comments
pub write_comments: Option<bool>,
// Download thumbnail
/// Download thumbnail
pub write_thumbnail: Option<bool>,
// Download subtitles
/// Download subtitles
pub write_subs: Option<bool>,
// Embed subtitles
/// Embed subtitles
pub embed_subs: Option<bool>,
// Embed thumbnail
/// Embed thumbnail
pub embed_thumbnail: Option<bool>,
// Embed metadata
/// Embed metadata
pub embed_metadata: Option<bool>,
// Embed chapters
/// Embed chapters
embed_chapters: Option<bool>,
// Embed info.json
/// Embed info.json
pub embed_info_json: Option<bool>,
// Split by chapter
/// Split by chapter
pub split_chapters: Option<bool>,
// Format Selection
/// Format Selection
pub format: Option<String>,
// Cookie File
/// Cookie File
pub cookie: Option<String>,
/// Webhooks for notifications
pub webhooks: Option<Vec<String>>,
}
#[derive(Clone)]
@ -79,6 +78,7 @@ impl YouTubeModule {
format: config.format,
cookie: config.cookie,
audio_only: Some(false),
webhooks: config.webhooks,
},
db,
root_dir,

52
src/yt_dlp/config.rs Normal file
View file

@ -0,0 +1,52 @@
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
/// Configuration for the `YouTube` Module
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YtDlpConfig {
/// Module Name
pub name: Option<String>,
/// Interval in minutes between checks
pub interval: u64,
/// Amount of items to query
pub limit: Option<u64>,
/// Items to check
pub items: HashMap<String, toml::Value>,
/// Format of the Thumbnail
pub thumbnail_format: Option<String>,
/// Output Template for yt-dlp
pub output_format: Option<String>,
/// Download description
pub write_description: Option<bool>,
/// Download info.json
pub write_info_json: Option<bool>,
/// Download comments
pub write_comments: Option<bool>,
/// Download thumbnail
pub write_thumbnail: Option<bool>,
/// Download subtitles
pub write_subs: Option<bool>,
/// Extract audio
pub audio_only: Option<bool>,
/// Audio Format
pub audio_format: Option<String>,
/// Embed subtitles
pub embed_subs: Option<bool>,
/// Embed thumbnail
pub embed_thumbnail: Option<bool>,
/// Embed metadata
pub embed_metadata: Option<bool>,
/// Embed chapters
pub embed_chapters: Option<bool>,
/// Embed info.json
pub embed_info_json: Option<bool>,
/// Split by chapter
pub split_chapters: Option<bool>,
/// Format Selection
pub format: Option<String>,
/// Cookie File
pub cookie: Option<String>,
/// Webhooks for notifications
pub webhooks: Option<Vec<String>>,
}

View file

@ -1,61 +1,14 @@
use std::{
collections::HashMap,
io::{BufRead, BufReader},
path::PathBuf,
process::Command,
};
use serde::{Deserialize, Serialize};
pub mod config;
use config::YtDlpConfig;
use crate::{ensure_dir_exists, Module};
/// Configuration for the `YouTube` Module
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YtDlpConfig {
// Module Name
pub name: Option<String>,
// Interval in minutes between checks
pub interval: u64,
/// Amount of items to query
pub limit: Option<u64>,
// Items to check
pub items: HashMap<String, String>,
// Format of the Thumbnail
pub thumbnail_format: Option<String>,
// Output Template for yt-dlp
pub output_format: Option<String>,
// Download description
pub write_description: Option<bool>,
// Download info.json
pub write_info_json: Option<bool>,
// Download comments
pub write_comments: Option<bool>,
// Download thumbnail
pub write_thumbnail: Option<bool>,
// Download subtitles
pub write_subs: Option<bool>,
// Extract audio
pub audio_only: Option<bool>,
// Audio Format
pub audio_format: Option<String>,
// Embed subtitles
pub embed_subs: Option<bool>,
// Embed thumbnail
pub embed_thumbnail: Option<bool>,
// Embed metadata
pub embed_metadata: Option<bool>,
// Embed chapters
pub embed_chapters: Option<bool>,
// Embed info.json
pub embed_info_json: Option<bool>,
// Split by chapter
pub split_chapters: Option<bool>,
// Format Selection
pub format: Option<String>,
// Cookie File
pub cookie: Option<String>,
}
#[derive(Clone)]
pub struct YtDlpModule {
config: YtDlpConfig,
@ -71,6 +24,36 @@ impl YtDlpModule {
root_dir,
}
}
fn check_item(&self, item: &str, item_url: &str, cwd: &PathBuf) {
log::info!("Fetching \"{item}\" videos");
match Self::get_latest_entries(item_url, self.config.limit.unwrap_or(10)) {
Ok(latest_videos) => {
for (video_title, video_url) in latest_videos {
if self.db.check_for_url(&video_url) {
log::trace!("Skipping \"{video_title}\" because it was already downloaded");
} else {
match self.download(&video_url, cwd) {
Ok(()) => {
// mark as downloaded
self.db.insert_url(&video_url);
self.db.update_new_downloads(&self.name(), item, item_url);
log::info!("Downloaded \"{video_title}\"");
self.webhook_notify(&video_url, &video_title, item, true);
}
Err(e) => {
log::error!("Error downloading \"{video_title}\"; Reason: {e}");
self.webhook_notify(&video_url, &video_title, item, false);
}
}
}
}
}
Err(e) => {
log::error!("Could not get videos from \"{item}\". Reason: {e}");
}
}
}
}
impl Module for YtDlpModule {
@ -86,33 +69,23 @@ impl Module for YtDlpModule {
log::info!("Running {} Module", self.name());
log::info!("Checking {} items", self.config.items.len());
for (item, item_url) in &self.config.items {
log::info!("Fetching \"{item}\" videos");
match Self::get_latest_entries(item_url, self.config.limit.unwrap_or(10)) {
Ok(latest_videos) => {
for (video_title, video_url) in latest_videos {
if self.db.check_for_url(&video_url) {
log::trace!(
"Skipping \"{video_title}\" because it was already downloaded"
);
} else {
match self.download(&video_url, &self.root_dir.join(item)) {
Ok(()) => {
// mark as downloaded
self.db.insert_url(&video_url);
log::info!("Downloaded \"{video_title}\"");
match item_url {
toml::Value::String(item_url) => {
self.check_item(item, item_url, &self.root_dir.join(item));
}
Err(e) => {
log::error!(
"Error downloading \"{video_title}\"; Reason: {e}"
toml::Value::Array(_) => todo!(),
toml::Value::Table(cat) => {
let category = item;
for (item, item_url) in cat {
let item_url = item_url.as_str().unwrap();
self.check_item(
item,
item_url,
&self.root_dir.join(category).join(item),
);
}
}
}
}
}
Err(e) => {
log::error!("Could not get videos from \"{item}\". Reason: {e}");
}
_ => {}
}
}
log::info!(
@ -126,6 +99,38 @@ impl Module for YtDlpModule {
}
impl YtDlpModule {
pub fn webhook_notify(&self, video_url: &str, video_title: &str, item: &str, success: bool) {
let request = serde_json::json!({
"module": self.name(),
"url": video_url,
"title": video_title,
"item": item,
"success": success
});
let client = reqwest::blocking::Client::new();
if let Some(webhooks) = &self.config.webhooks {
for url in webhooks {
client
.post(url)
.json(&request)
.send()
.expect("Failed to send webhook request");
}
}
}
/// A function to get the latest entries (title and URL) for a given channel with a specified limit.
///
/// # Arguments
///
/// * `channel` - The name of the `YouTube` channel.
/// * `limit` - The maximum number of entries to return.
///
/// # Returns
///
/// A `Result` containing a vector of tuples if successful, where each tuple contains the title and URL of an entry.
/// An error message if execution of `yt-dlp` fails.
fn get_latest_entries(channel: &str, limit: u64) -> Result<Vec<(String, String)>, String> {
let output = Command::new("yt-dlp")
.arg("--no-warnings")