From 1f32c21363aa0849f335bea633a6e4610218e2c2 Mon Sep 17 00:00:00 2001 From: JMARyA Date: Sun, 10 Mar 2024 04:52:50 +0100 Subject: [PATCH] add yt_dlp module + db --- Cargo.lock | 6 ++ Cargo.toml | 1 + config.toml | 48 +++++++++- src/config.rs | 4 + src/db.rs | 92 ++++++++++++++---- src/main.rs | 26 ++++- src/youtube/mod.rs | 196 ++++++++++++-------------------------- src/yt_dlp/mod.rs | 233 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 447 insertions(+), 159 deletions(-) create mode 100644 src/yt_dlp/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 7f8a619..a817457 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -214,6 +214,7 @@ version = "0.1.0" dependencies = [ "chrono", "env_logger", + "jobdispatcher", "log", "rusqlite", "serde", @@ -266,6 +267,11 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "jobdispatcher" +version = "0.1.0" +source = "git+https://git.hydrar.de/jmarya/jobdispatcher#df3bbb09ab2b2cace22d052e4a22370c88be9f2c" + [[package]] name = "js-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 0e31fda..8bdce57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,4 @@ rusqlite = "0.30.0" serde = { version = "1.0.196", features = ["derive"] } serde_json = "1.0.113" toml = "0.8.10" +jobdispatcher = { git = "https://git.hydrar.de/jmarya/jobdispatcher" } \ No newline at end of file diff --git a/config.toml b/config.toml index bf0c0a0..0e90c13 100644 --- a/config.toml +++ b/config.toml @@ -12,4 +12,50 @@ output_format = "%(title)s [%(id)s].%(ext)s" [youtube.channels] # Channel Mappings -MentalOutlaw = "https://www.youtube.com/@MentalOutlaw" \ No newline at end of file +MentalOutlaw = "https://www.youtube.com/@MentalOutlaw" + +[[yt_dlp]] +# Module Name +name = "Custom-yt_dlp" +# Interval in minutes between checks +interval = 30 +# Amount of items to query +limit = 10 +# Format of the Thumbnail +thumbnail_format = "jpg" +# Output Template for yt-dlp +output_format = "%(title)s [%(id)s].%(ext)s" +# Download description +write_description = false +# Download info.json +write_info_json = false +# Download comments +write_comments = false +# Download thumbnail +write_thumbnail = true +# Download subtitles +write_subs = false +# Extract audio +audio_only = false +# Audio Format +audio_format = "m4a" +# Embed subtitles +embed_subs = false +# Embed thumbnail +embed_thumbnail = false +# Embed metadata +embed_metadata = true +# Embed chapters +embed_chapters = true +# Embed info.json +embed_info_json = true +# Split by chapter +split_chapters = false +# Format Selection +format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio" +# Cookie File +cookie = "cookies.txt" + +# Items to check +[yt_dlp.items] +Item = "url" diff --git a/src/config.rs b/src/config.rs index 818df0e..6df00d2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,8 @@ use std::path::PathBuf; use serde::{Deserialize, Serialize}; +use crate::yt_dlp::YtDlpConfig; + /// General settings for hoard #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HoardConfig { @@ -16,4 +18,6 @@ pub struct GlobalConfig { pub hoard: HoardConfig, // Configuration for the YouTube Module pub youtube: Option, + // Custom instances of yt-dlp + pub yt_dlp: Option>, } diff --git a/src/db.rs b/src/db.rs index 1cc5da1..4040d71 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,16 +1,19 @@ +use jobdispatcher::{JobDispatcher, JobOrder}; use rusqlite::Connection; -use std::error::Error; +use std::sync::{mpsc::Receiver, Arc}; -// todo : make db singleton - -#[derive(Debug, Clone)] -pub struct Database { - file: String, +pub struct DatabaseBackend { + pub file: String, + pub conn: Connection, + pub dispatcher: Arc>, + pub recv: Receiver>, } -impl Database { +impl DatabaseBackend { pub fn new(file: &str) -> Self { + let (dispatcher, recv) = jobdispatcher::JobDispatcher::::new(); let conn = Connection::open(file).unwrap(); + conn.execute( "CREATE TABLE IF NOT EXISTS urls ( id INTEGER PRIMARY KEY, @@ -21,25 +24,74 @@ impl Database { ) .unwrap(); + let dispatcher = Arc::new(dispatcher); Self { file: file.to_string(), + conn, + dispatcher, + recv, } } - pub fn insert_url(&self, url: &str) -> Result<(), Box> { - let conn = Connection::open(&self.file)?; - let timestamp = chrono::Local::now().to_rfc3339(); - conn.execute( - "INSERT INTO urls (url, timestamp) VALUES (?, ?)", - [url, ×tamp], - )?; - Ok(()) + pub fn take_db(&self) -> Database { + Database::new(self.dispatcher.clone()) } - pub fn check_for_url(&self, url: &str) -> Result> { - let conn = Connection::open(&self.file)?; - let mut stmt = conn.prepare("SELECT COUNT(*) FROM urls WHERE url = ?")?; - let count: i64 = stmt.query_row([url], |row| row.get(0))?; - Ok(count > 0) + pub fn run(&self) { + while let Ok(job) = self.recv.recv() { + match job.param { + Query::InsertUrl(ref url) => { + let timestamp = chrono::Local::now().to_rfc3339(); + self.conn + .execute( + "INSERT INTO urls (url, timestamp) VALUES (?, ?)", + [url, ×tamp], + ) + .unwrap(); + job.done(Out::Ok); + } + Query::CheckForUrl(ref url) => { + let conn = Connection::open(&self.file).unwrap(); + let mut stmt = conn + .prepare("SELECT COUNT(*) FROM urls WHERE url = ?") + .unwrap(); + let count: i64 = stmt.query_row([url], |row| row.get(0)).unwrap(); + job.done(Out::Bool(count > 0)); + } + } + } + } +} + +pub enum Query { + InsertUrl(String), + CheckForUrl(String), +} + +pub enum Out { + Ok, + Bool(bool), + // Rows(Vec), +} + +#[derive(Clone)] +pub struct Database { + conn: Arc>, +} + +impl Database { + pub fn new(conn: Arc>) -> Self { + Self { conn } + } + + pub fn insert_url(&self, url: &str) { + self.conn.send(Query::InsertUrl(url.to_string())); + } + + pub fn check_for_url(&self, url: &str) -> bool { + match self.conn.send(Query::CheckForUrl(url.to_string())) { + Out::Ok => false, + Out::Bool(b) => b, + } } } diff --git a/src/main.rs b/src/main.rs index 5228ce2..3b82bb1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,9 +3,12 @@ use std::path::PathBuf; mod config; mod db; mod youtube; +mod yt_dlp; use config::GlobalConfig; +use crate::yt_dlp::YtDlpModule; + // todo : migrate to async code? // todo : better log options @@ -38,18 +41,33 @@ fn main() { log::info!("Starting hoard"); - let db = db::Database::new("download.db"); + let db = db::DatabaseBackend::new("download.db"); let config: GlobalConfig = toml::from_str(&std::fs::read_to_string("config.toml").unwrap()).unwrap(); - ensure_dir_exists(&config.hoard.data_dir); - let modules: Vec> = vec![Box::new(youtube::YouTubeModule::new( + let mut modules: Vec> = vec![Box::new(youtube::YouTubeModule::new( config.youtube.unwrap(), - db, + db.take_db(), config.hoard.data_dir.join("youtube"), ))]; + for yt_dlp_mod in config.yt_dlp.unwrap_or_default() { + let mod_name = yt_dlp_mod + .name + .clone() + .unwrap_or_else(|| "yt_dlp".to_string()); + modules.push(Box::new(YtDlpModule::new( + yt_dlp_mod, + db.take_db(), + config.hoard.data_dir.join(mod_name), + ))); + } + + let _db_thread = std::thread::spawn(move || { + db.run(); + }); + let threads: Vec<_> = modules .into_iter() .map(|x| { diff --git a/src/youtube/mod.rs b/src/youtube/mod.rs index baa44c7..0754e4a 100644 --- a/src/youtube/mod.rs +++ b/src/youtube/mod.rs @@ -1,13 +1,11 @@ -use std::{ - collections::HashMap, - io::{BufRead, BufReader}, - path::PathBuf, - process::Command, -}; +use std::{collections::HashMap, path::PathBuf}; use serde::{Deserialize, Serialize}; -use crate::{ensure_dir_exists, Module}; +use crate::{ + yt_dlp::{YtDlpConfig, YtDlpModule}, + Module, +}; /// Configuration for the `YouTube` Module #[derive(Debug, Clone, Serialize, Deserialize)] @@ -22,30 +20,69 @@ pub struct YouTubeConfig { thumbnail_format: Option, // Output Template for yt-dlp output_format: Option, + // Download description + pub write_description: Option, + // Download info.json + pub write_info_json: Option, + // Download comments + pub write_comments: Option, + // Download thumbnail + pub write_thumbnail: Option, + // Download subtitles + pub write_subs: Option, + // Embed subtitles + pub embed_subs: Option, + // Embed thumbnail + pub embed_thumbnail: Option, + // Embed metadata + pub embed_metadata: Option, + // Embed chapters + embed_chapters: Option, + // Embed info.json + pub embed_info_json: Option, + // Split by chapter + pub split_chapters: Option, + // Format Selection + pub format: Option, + // Cookie File + pub cookie: Option, } -impl YouTubeConfig { - pub fn download_options(&self) -> DownloadOptions { - DownloadOptions { - thumbnail_format: self.thumbnail_format.clone(), - output_format: self.output_format.clone(), - } - } -} - -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct YouTubeModule { - config: YouTubeConfig, - db: crate::db::Database, - root_dir: PathBuf, + yt_dlp: YtDlpModule, } impl YouTubeModule { - pub const fn new(config: YouTubeConfig, db: crate::db::Database, root_dir: PathBuf) -> Self { + pub fn new(config: YouTubeConfig, db: crate::db::Database, root_dir: PathBuf) -> Self { Self { - config, - db, - root_dir, + yt_dlp: YtDlpModule::new( + YtDlpConfig { + name: Some("youtube".to_string()), + interval: config.interval, + limit: config.limit, + items: config.channels, + thumbnail_format: config.thumbnail_format, + output_format: config.output_format.clone(), + write_description: Some(config.write_description.unwrap_or(true)), + write_info_json: config.write_info_json, + write_comments: config.write_comments, + write_thumbnail: Some(config.write_thumbnail.unwrap_or(true)), + write_subs: config.write_subs, + audio_format: None, + embed_subs: config.embed_subs, + embed_thumbnail: config.embed_thumbnail, + embed_metadata: config.embed_metadata, + embed_chapters: config.embed_chapters, + embed_info_json: config.embed_info_json, + split_chapters: config.split_chapters, + format: config.format, + cookie: config.cookie, + audio_only: Some(false), + }, + db, + root_dir, + ), } } } @@ -56,115 +93,6 @@ impl Module for YouTubeModule { } fn run(&self) { - loop { - log::info!("Running YouTube Module"); - let download_options = self.config.download_options(); - log::info!("Checking {} channels", self.config.channels.len()); - for (channel, channel_url) in &self.config.channels { - log::info!("Fetching \"{channel}\" videos"); - match Self::get_latest_channel_videos(channel_url, self.config.limit.unwrap_or(10)) - { - Ok(latest_videos) => { - for (video_title, video_url) in latest_videos { - if self.db.check_for_url(&video_url).unwrap() { - log::trace!( - "Skipping \"{video_title}\" because it was already downloaded" - ); - } else { - match Self::download_video( - &video_url, - &self.root_dir.join(channel), - &download_options, - ) { - Ok(()) => { - // mark as downloaded - self.db.insert_url(&video_url).unwrap(); - log::info!("Downloaded \"{video_title}\""); - } - Err(e) => { - log::error!( - "Error downloading \"{video_title}\"; Reason: {e}" - ); - } - } - } - } - } - Err(e) => { - log::error!("Could not get videos from \"{channel}\". Reason: {e}"); - } - } - } - log::info!( - "Check complete. Sleeping for {} minutes...", - self.config.interval - ); - std::thread::sleep(std::time::Duration::from_secs(self.config.interval * 60)); - } + self.yt_dlp.run(); } } - -impl YouTubeModule { - fn get_latest_channel_videos( - channel: &str, - limit: u64, - ) -> Result, String> { - let output = Command::new("yt-dlp") - .arg("--no-warnings") - .arg("--flat-playlist") - .arg("--skip-download") - .arg("--print") - .arg("title,webpage_url") - .arg("--playlist-end") - .arg(limit.to_string()) - .arg(channel) - .output() - .expect("Failed to execute yt-dlp"); - - if !output.status.success() { - return Err(String::from_utf8(output.stderr).unwrap()); - } - - let reader = BufReader::new(&output.stdout[..]); - let mut videos = Vec::new(); - let mut lines = reader.lines(); - while let (Some(title), Some(url)) = (lines.next(), lines.next()) { - if let (Ok(title), Ok(url)) = (title, url) { - videos.push((title, url)); - } - } - - Ok(videos.into_iter().take(limit as usize).collect()) - } - - fn download_video(video_url: &str, cwd: &PathBuf, opt: &DownloadOptions) -> Result<(), String> { - ensure_dir_exists(cwd); - let output = Command::new("yt-dlp") - .current_dir(cwd) - .arg("--downloader") - .arg("aria2c") - .arg("--write-thumbnail") - .arg("-o") - .arg(opt.output_format.as_deref().unwrap_or("%(title)s.%(ext)s")) - .arg("--embed-thumbnail") - .arg("--embed-chapters") - .arg("--embed-info-json") - .arg("--convert-thumbnails") - .arg(opt.thumbnail_format.as_deref().unwrap_or("jpg")) - .arg(video_url) - .output() - .map_err(|_| "yt-dlp command failed".to_string())?; - - if !output.status.success() { - let error_message = String::from_utf8_lossy(&output.stderr).to_string(); - return Err(error_message); - } - - Ok(()) - } -} - -pub struct DownloadOptions { - thumbnail_format: Option, - output_format: Option, -} diff --git a/src/yt_dlp/mod.rs b/src/yt_dlp/mod.rs new file mode 100644 index 0000000..eaf68e4 --- /dev/null +++ b/src/yt_dlp/mod.rs @@ -0,0 +1,233 @@ +use std::{ + collections::HashMap, + io::{BufRead, BufReader}, + path::PathBuf, + process::Command, +}; + +use serde::{Deserialize, Serialize}; + +use crate::{ensure_dir_exists, Module}; + +/// Configuration for the `YouTube` Module +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct YtDlpConfig { + // Module Name + pub name: Option, + // Interval in minutes between checks + pub interval: u64, + /// Amount of items to query + pub limit: Option, + // Items to check + pub items: HashMap, + // Format of the Thumbnail + pub thumbnail_format: Option, + // Output Template for yt-dlp + pub output_format: Option, + // Download description + pub write_description: Option, + // Download info.json + pub write_info_json: Option, + // Download comments + pub write_comments: Option, + // Download thumbnail + pub write_thumbnail: Option, + // Download subtitles + pub write_subs: Option, + // Extract audio + pub audio_only: Option, + // Audio Format + pub audio_format: Option, + // Embed subtitles + pub embed_subs: Option, + // Embed thumbnail + pub embed_thumbnail: Option, + // Embed metadata + pub embed_metadata: Option, + // Embed chapters + pub embed_chapters: Option, + // Embed info.json + pub embed_info_json: Option, + // Split by chapter + pub split_chapters: Option, + // Format Selection + pub format: Option, + // Cookie File + pub cookie: Option, +} + +#[derive(Clone)] +pub struct YtDlpModule { + config: YtDlpConfig, + db: crate::db::Database, + root_dir: PathBuf, +} + +impl YtDlpModule { + pub const fn new(config: YtDlpConfig, db: crate::db::Database, root_dir: PathBuf) -> Self { + Self { + config, + db, + root_dir, + } + } +} + +impl Module for YtDlpModule { + fn name(&self) -> String { + self.config + .name + .clone() + .unwrap_or_else(|| "yt-dlp".to_string()) + } + + fn run(&self) { + loop { + log::info!("Running {} Module", self.name()); + log::info!("Checking {} items", self.config.items.len()); + for (item, item_url) in &self.config.items { + log::info!("Fetching \"{item}\" videos"); + match Self::get_latest_entries(item_url, self.config.limit.unwrap_or(10)) { + Ok(latest_videos) => { + for (video_title, video_url) in latest_videos { + if self.db.check_for_url(&video_url) { + log::trace!( + "Skipping \"{video_title}\" because it was already downloaded" + ); + } else { + match self.download(&video_url, &self.root_dir.join(item)) { + Ok(()) => { + // mark as downloaded + self.db.insert_url(&video_url); + log::info!("Downloaded \"{video_title}\""); + } + Err(e) => { + log::error!( + "Error downloading \"{video_title}\"; Reason: {e}" + ); + } + } + } + } + } + Err(e) => { + log::error!("Could not get videos from \"{item}\". Reason: {e}"); + } + } + } + log::info!( + "{} complete. Sleeping for {} minutes...", + self.name(), + self.config.interval + ); + std::thread::sleep(std::time::Duration::from_secs(self.config.interval * 60)); + } + } +} + +impl YtDlpModule { + fn get_latest_entries(channel: &str, limit: u64) -> Result, String> { + let output = Command::new("yt-dlp") + .arg("--no-warnings") + .arg("--flat-playlist") + .arg("--skip-download") + .arg("--print") + .arg("title,webpage_url") + .arg("--playlist-end") + .arg(limit.to_string()) + .arg(channel) + .output() + .expect("Failed to execute yt-dlp"); + + if !output.status.success() { + return Err(String::from_utf8(output.stderr).unwrap()); + } + + let reader = BufReader::new(&output.stdout[..]); + let mut videos = Vec::new(); + let mut lines = reader.lines(); + while let (Some(title), Some(url)) = (lines.next(), lines.next()) { + if let (Ok(title), Ok(url)) = (title, url) { + videos.push((title, url)); + } + } + + Ok(videos.into_iter().take(limit as usize).collect()) + } + + fn download(&self, video_url: &str, cwd: &PathBuf) -> Result<(), String> { + ensure_dir_exists(cwd); + let mut command = Command::new("yt-dlp"); + let mut command = command.current_dir(cwd).arg("--downloader").arg("aria2c"); + + if self.config.write_thumbnail.unwrap_or(true) { + command = command.arg("--write-thumbnail"); + } + if self.config.write_description.unwrap_or(false) { + command = command.arg("--write-description"); + } + if self.config.write_info_json.unwrap_or(false) { + command = command.arg("--write-info-json"); + } + if self.config.write_comments.unwrap_or(false) { + command = command.arg("--write-comments"); + } + if self.config.write_subs.unwrap_or(false) { + command = command.arg("--write-subs"); + } + if self.config.audio_only.unwrap_or(false) { + command = command.arg("--extract-audio"); + } + if let Some(audio_format) = &self.config.audio_format { + command = command.arg("--audio-format").arg(audio_format); + } + + if self.config.embed_chapters.unwrap_or(true) { + command = command.arg("--embed-chapters"); + } + if self.config.embed_info_json.unwrap_or(true) { + command = command.arg("--embed-info-json"); + } + if self.config.embed_metadata.unwrap_or(true) { + command = command.arg("--embed-metadata"); + } + if self.config.embed_subs.unwrap_or(false) { + command = command.arg("--embed-subs"); + } + if self.config.embed_thumbnail.unwrap_or(true) { + command = command.arg("--embed-thumbnail"); + } + + if self.config.split_chapters.unwrap_or(false) { + command = command.arg("--split-chapters"); + } + + if let Some(format) = &self.config.format { + command = command.arg("--format").arg(format); + } + if let Some(cookie) = &self.config.cookie { + command = command.arg("--cookies").arg(cookie); + } + + let output = command + .arg("--convert-thumbnails") + .arg(self.config.thumbnail_format.as_deref().unwrap_or("jpg")) + .arg("-o") + .arg( + self.config + .output_format + .as_deref() + .unwrap_or("%(title)s.%(ext)s"), + ) + .arg(video_url) + .output() + .map_err(|_| "yt-dlp command failed".to_string())?; + + if !output.status.success() { + let error_message = String::from_utf8_lossy(&output.stderr).to_string(); + return Err(error_message); + } + + Ok(()) + } +}