multithreaded downloading

This commit is contained in:
JMARyA 2025-03-08 21:51:42 +01:00
parent c4c54f78d6
commit d463b48ec2
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
4 changed files with 328 additions and 110 deletions

View file

@ -1,4 +1,5 @@
use hoard::config::GlobalConfig;
use hoard::yt_dlp::download_yt_dlp_init;
use hoard::{ensure_dir_exists, Module};
// todo : migrate to async code?
@ -26,6 +27,9 @@ async fn main() {
toml::from_str(&std::fs::read_to_string("config.toml").unwrap()).unwrap();
ensure_dir_exists(&config.hoard.data_dir);
let mut sm = comrade::service::ServiceManager::new();
sm = download_yt_dlp_init(sm);
let db = hoard::db::DatabaseBackend::new(&config.hoard.database).await;
let mut modules: Vec<Module> = vec![];
@ -60,10 +64,8 @@ async fn main() {
)));
}
let mut sm = comrade::service::ServiceManager::new();
for module in modules {
sm.register(&module.name(), move |_| {
sm = sm.register(&module.name(), move |_| {
module.run();
});
}

View file

@ -2,13 +2,106 @@ use std::{
io::{BufRead, BufReader},
path::PathBuf,
process::Command,
str::FromStr,
};
pub mod config;
use comrade::job::{JobDispatcher, JobOrder};
use comrade::service::ServiceManager;
use comrade::worker;
use config::YtDlpConfig;
use crossbeam::channel::Receiver;
use crate::ensure_dir_exists;
#[worker(3)]
fn download_yt_dlp(
config: YtDlpConfig,
video_url: String,
video_title: String,
cwd: String,
) -> bool {
log::info!("Started downloading {video_title}");
let cwd = PathBuf::from_str(&cwd).unwrap();
ensure_dir_exists(&cwd);
let mut command = Command::new("yt-dlp");
let mut command = command.current_dir(cwd).arg("--downloader").arg("aria2c");
if config.write_thumbnail.unwrap_or(true) {
command = command.arg("--write-thumbnail");
}
if config.write_description.unwrap_or(false) {
command = command.arg("--write-description");
}
if config.write_info_json.unwrap_or(false) {
command = command.arg("--write-info-json");
}
if config.write_comments.unwrap_or(false) {
command = command.arg("--write-comments");
}
if config.write_subs.unwrap_or(false) {
command = command.arg("--write-subs");
}
if config.audio_only.unwrap_or(false) {
command = command.arg("--extract-audio");
}
if let Some(audio_format) = &config.audio_format {
command = command.arg("--audio-format").arg(audio_format);
}
if config.embed_chapters.unwrap_or(true) {
command = command.arg("--embed-chapters");
}
if config.embed_info_json.unwrap_or(true) {
command = command.arg("--embed-info-json");
}
if config.embed_metadata.unwrap_or(true) {
command = command.arg("--embed-metadata");
}
if config.embed_subs.unwrap_or(false) {
command = command.arg("--embed-subs");
}
if config.embed_thumbnail.unwrap_or(true) {
command = command.arg("--embed-thumbnail");
}
if config.split_chapters.unwrap_or(false) {
command = command.arg("--split-chapters");
}
if let Some(format) = &config.format {
command = command.arg("--format").arg(format);
}
if let Some(cookie) = &config.cookie {
command = command.arg("--cookies").arg(cookie);
}
let output = command
.arg("--convert-thumbnails")
.arg(config.thumbnail_format.as_deref().unwrap_or("jpg"))
.arg("-o")
.arg(
config
.output_format
.as_deref()
.unwrap_or("%(title)s.%(ext)s"),
)
.arg(&video_url)
.output()
.map_err(|_| "yt-dlp command failed".to_string())
.unwrap();
if !output.status.success() {
let error_message = String::from_utf8_lossy(&output.stderr).to_string();
log::error!("Download for {video_url} failed: {error_message}");
false
} else {
true
}
}
#[derive(Clone)]
pub struct YtDlpModule {
config: YtDlpConfig,
@ -29,22 +122,36 @@ impl YtDlpModule {
log::info!("Fetching \"{item}\" videos");
match Self::get_latest_entries(item_url, self.config.limit.unwrap_or(10)) {
Ok(latest_videos) => {
for (video_title, video_url) in latest_videos {
let mut downloaded_videos = Vec::new();
for (video_title, video_url) in &latest_videos {
if self.db.check_for_url(&video_url) {
log::trace!("Skipping \"{video_title}\" because it was already downloaded");
} else {
match self.download(&video_url, cwd) {
Ok(()) => {
// mark as downloaded
self.db.insert_url(&self.name(), item, &video_url);
self.db.update_new_downloads(&self.name(), item, item_url);
log::info!("Downloaded \"{video_title}\"");
self.webhook_notify(&video_url, &video_title, item, true);
}
Err(e) => {
log::error!("Error downloading \"{video_title}\"; Reason: {e}");
self.webhook_notify(&video_url, &video_title, item, false);
}
downloaded_videos.push((
download_yt_dlp_async(
self.config.clone(),
video_url.clone(),
video_title.clone(),
cwd.to_str().unwrap().to_string(),
),
video_title.clone(),
video_url.clone(),
));
}
}
for (video, video_title, video_url) in downloaded_videos {
match video.wait().as_bool().unwrap() {
true => {
// mark as downloaded
self.db.insert_url(&self.name(), item, &video_url);
self.db.update_new_downloads(&self.name(), item, item_url);
log::info!("Downloaded \"{video_title}\"");
self.webhook_notify(&video_url, &video_title, item, true);
}
false => {
self.webhook_notify(&video_url, &video_title, item, false);
}
}
}
@ -157,80 +264,4 @@ impl YtDlpModule {
Ok(videos.into_iter().take(limit as usize).collect())
}
fn download(&self, video_url: &str, cwd: &PathBuf) -> Result<(), String> {
ensure_dir_exists(cwd);
let mut command = Command::new("yt-dlp");
let mut command = command.current_dir(cwd).arg("--downloader").arg("aria2c");
if self.config.write_thumbnail.unwrap_or(true) {
command = command.arg("--write-thumbnail");
}
if self.config.write_description.unwrap_or(false) {
command = command.arg("--write-description");
}
if self.config.write_info_json.unwrap_or(false) {
command = command.arg("--write-info-json");
}
if self.config.write_comments.unwrap_or(false) {
command = command.arg("--write-comments");
}
if self.config.write_subs.unwrap_or(false) {
command = command.arg("--write-subs");
}
if self.config.audio_only.unwrap_or(false) {
command = command.arg("--extract-audio");
}
if let Some(audio_format) = &self.config.audio_format {
command = command.arg("--audio-format").arg(audio_format);
}
if self.config.embed_chapters.unwrap_or(true) {
command = command.arg("--embed-chapters");
}
if self.config.embed_info_json.unwrap_or(true) {
command = command.arg("--embed-info-json");
}
if self.config.embed_metadata.unwrap_or(true) {
command = command.arg("--embed-metadata");
}
if self.config.embed_subs.unwrap_or(false) {
command = command.arg("--embed-subs");
}
if self.config.embed_thumbnail.unwrap_or(true) {
command = command.arg("--embed-thumbnail");
}
if self.config.split_chapters.unwrap_or(false) {
command = command.arg("--split-chapters");
}
if let Some(format) = &self.config.format {
command = command.arg("--format").arg(format);
}
if let Some(cookie) = &self.config.cookie {
command = command.arg("--cookies").arg(cookie);
}
let output = command
.arg("--convert-thumbnails")
.arg(self.config.thumbnail_format.as_deref().unwrap_or("jpg"))
.arg("-o")
.arg(
self.config
.output_format
.as_deref()
.unwrap_or("%(title)s.%(ext)s"),
)
.arg(video_url)
.output()
.map_err(|_| "yt-dlp command failed".to_string())?;
if !output.status.success() {
let error_message = String::from_utf8_lossy(&output.stderr).to_string();
return Err(error_message);
}
Ok(())
}
}