This commit is contained in:
JMARyA 2024-03-07 16:18:47 +01:00
commit 64215f5b70
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
9 changed files with 984 additions and 0 deletions

17
src/config.rs Normal file
View file

@ -0,0 +1,17 @@
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HoardConfig {
// Top level data download directory
pub data_dir: PathBuf,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GlobalConfig {
// Hoard Configuration
pub hoard: HoardConfig,
// Configuration for the YouTube Module
pub youtube: Option<crate::youtube::YouTubeConfig>,
}

45
src/db.rs Normal file
View file

@ -0,0 +1,45 @@
use rusqlite::Connection;
use std::error::Error;
// todo : make db singleton
#[derive(Debug, Clone)]
pub struct Database {
file: String,
}
impl Database {
pub fn new(file: &str) -> Self {
let conn = Connection::open(file).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS urls (
id INTEGER PRIMARY KEY,
url TEXT NOT NULL,
timestamp TEXT NOT NULL
)",
[],
)
.unwrap();
Self {
file: file.to_string(),
}
}
pub fn insert_url(&self, url: &str) -> Result<(), Box<dyn Error>> {
let conn = Connection::open(&self.file)?;
let timestamp = chrono::Local::now().to_rfc3339();
conn.execute(
"INSERT INTO urls (url, timestamp) VALUES (?, ?)",
[url, &timestamp],
)?;
Ok(())
}
pub fn check_for_url(&self, url: &str) -> Result<bool, Box<dyn Error>> {
let conn = Connection::open(&self.file)?;
let mut stmt = conn.prepare("SELECT COUNT(*) FROM urls WHERE url = ?")?;
let count: i64 = stmt.query_row([url], |row| row.get(0))?;
Ok(count > 0)
}
}

65
src/main.rs Normal file
View file

@ -0,0 +1,65 @@
use std::path::PathBuf;
mod config;
mod db;
mod youtube;
use config::GlobalConfig;
// todo : migrate to async code?
pub fn ensure_dir_exists(dir_path: &PathBuf) {
let path = std::path::Path::new(dir_path);
if !path.exists() {
std::fs::create_dir_all(path).unwrap();
}
}
trait Module: Send {
fn name(&self) -> String;
fn run(&self);
}
fn main() {
#[cfg(debug_assertions)]
{
std::env::set_var("RUST_LOG", "trace");
std::env::set_var("RUST_BACKTRACE", "1");
}
#[cfg(not(debug_assertions))]
{
if std::env::var("RUST_LOG").is_err() {
std::env::set_var("RUST_LOG", "warn");
}
}
env_logger::init();
log::info!("Starting hoard");
let db = db::Database::new("download.db");
let config: GlobalConfig =
toml::from_str(&std::fs::read_to_string("config.toml").unwrap()).unwrap();
ensure_dir_exists(&config.hoard.data_dir);
let modules: Vec<Box<dyn Module>> = vec![Box::new(youtube::YouTubeModule::new(
config.youtube.unwrap(),
db,
config.hoard.data_dir.join("youtube"),
))];
let threads: Vec<_> = modules
.into_iter()
.map(|x| {
std::thread::spawn(move || {
x.run();
})
})
.collect();
for t in threads {
// todo : fix dying threads
t.join().unwrap();
}
}

151
src/youtube/mod.rs Normal file
View file

@ -0,0 +1,151 @@
use std::{
collections::HashMap,
io::{BufRead, BufReader},
path::PathBuf,
process::Command,
};
use serde::{Deserialize, Serialize};
use crate::{ensure_dir_exists, Module};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct YouTubeConfig {
// Interval in minutes between checks
interval: u64,
// Channels to check
channels: HashMap<String, String>,
// Format of the Thumbnail
thumbnail_format: Option<String>,
// Output Template for yt-dlp
output_format: Option<String>,
}
impl YouTubeConfig {
pub fn download_options(&self) -> DownloadOptions {
DownloadOptions {
thumbnail_format: self.thumbnail_format.clone(),
output_format: self.output_format.clone(),
}
}
}
#[derive(Clone, Debug)]
pub struct YouTubeModule {
config: YouTubeConfig,
db: crate::db::Database,
root_dir: PathBuf,
}
impl YouTubeModule {
pub const fn new(config: YouTubeConfig, db: crate::db::Database, root_dir: PathBuf) -> Self {
Self {
config,
db,
root_dir,
}
}
}
impl Module for YouTubeModule {
fn name(&self) -> String {
"YouTube".to_string()
}
fn run(&self) {
log::info!("Running YouTube Module");
let download_options = self.config.download_options();
for (channel, channel_url) in &self.config.channels {
log::info!("Fetching {channel} videos");
match Self::get_latest_channel_videos(channel_url) {
Ok(latest_videos) => {
for (video_title, video_url) in latest_videos {
if self.db.check_for_url(&video_url).unwrap() {
log::trace!("Skipping {video_title} because it was already downloaded");
} else {
match Self::download_video(
&video_url,
&self.root_dir.join(channel),
&download_options,
) {
Ok(()) => {
// mark as downloaded
self.db.insert_url(&video_url).unwrap();
log::info!("Downloaded {video_title}");
}
Err(e) => {
log::error!("Error downloading {video_title}; Reason: {e}");
// todo : error handling
}
}
}
}
}
Err(e) => {
log::error!("Could not get videos from {channel}. Reason: {e}");
}
}
}
std::thread::sleep(std::time::Duration::from_secs(self.config.interval * 60));
}
}
impl YouTubeModule {
fn get_latest_channel_videos(channel: &str) -> Result<Vec<(String, String)>, String> {
let output = Command::new("yt-dlp")
.arg("--no-warnings")
.arg("--flat-playlist")
.arg("--skip-download")
.arg("--print")
.arg("title,webpage_url")
.arg("--playlist-end")
.arg("10")
.arg(channel)
.output()
.expect("Failed to execute yt-dlp");
if !output.status.success() {
return Err(String::from_utf8(output.stderr).unwrap());
}
let reader = BufReader::new(&output.stdout[..]);
let mut videos = Vec::new();
let mut lines = reader.lines();
while let (Some(title), Some(url)) = (lines.next(), lines.next()) {
if let (Ok(title), Ok(url)) = (title, url) {
videos.push((title, url));
}
}
Ok(videos)
}
fn download_video(video_url: &str, cwd: &PathBuf, opt: &DownloadOptions) -> Result<(), String> {
ensure_dir_exists(cwd);
let output = Command::new("yt-dlp")
.current_dir(cwd)
.arg("--write-thumbnail")
.arg("-o")
.arg(opt.output_format.as_deref().unwrap_or("%(title)s.%(ext)s"))
.arg("--embed-thumbnail")
.arg("--embed-chapters")
.arg("--embed-info-json")
.arg("--convert-thumbnails")
.arg(opt.thumbnail_format.as_deref().unwrap_or("jpg"))
.arg(video_url)
.output()
.map_err(|_| "yt-dlp command failed".to_string())?;
if !output.status.success() {
let error_message = String::from_utf8_lossy(&output.stderr).to_string();
return Err(error_message);
}
Ok(())
}
}
pub struct DownloadOptions {
thumbnail_format: Option<String>,
output_format: Option<String>,
}