This commit is contained in:
JMARyA 2023-10-06 18:29:55 +02:00
commit e431d3b745
Signed by: jmarya
GPG key ID: 901B2ADDF27C2263
17 changed files with 3489 additions and 0 deletions

32
src/library/func.rs Normal file
View file

@ -0,0 +1,32 @@
use ring::digest::{Context, SHA256};
use std::io::Read;
pub fn is_video_file(filename: &str) -> bool {
let video_extensions = vec![r"\.mp4$", r"\.mkv$", r"\.webm$", r"\.avi$"];
for ext in video_extensions {
let regex = regex::Regex::new(ext).unwrap();
if regex.is_match(filename) {
return true;
}
}
false
}
pub fn calculate_sha256_hash(file_path: &str) -> std::io::Result<String> {
log::info!("Hashing {file_path}");
let mut context = Context::new(&SHA256);
let mut file = std::fs::File::open(file_path)?;
let mut buffer = [0u8; 4096];
loop {
let n = file.read(&mut buffer)?;
if n == 0 {
break;
}
context.update(&buffer[..n]);
}
let digest = context.finish();
Ok(hex::encode(digest.as_ref()))
}

308
src/library/mod.rs Normal file
View file

@ -0,0 +1,308 @@
use rayon::prelude::IntoParallelIterator;
use rayon::prelude::ParallelIterator;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use walkdir::WalkDir;
use func::is_video_file;
pub use video::Video;
mod func;
mod video;
#[macro_export]
macro_rules! query_row_map {
($db:ident, $query:expr, $param:expr, $map_fn:expr) => {{
let mut state = $db.prepare($query).unwrap();
let r: Vec<_> = state
.query_map($param, $map_fn)
.unwrap()
.flatten()
.collect();
r
}};
}
#[derive(Debug, Clone)]
pub struct Library {
conn: Arc<Mutex<rusqlite::Connection>>,
}
impl Library {
pub fn new() -> Self {
log::info!("Creating database connection");
let conn = Arc::new(Mutex::new(rusqlite::Connection::open("videos.db").unwrap()));
let s = Self { conn };
s.init_schema();
s
}
}
// DB
impl Library {
pub fn init_schema(&self) {
let mut con = self.conn.lock().unwrap();
let tx = con.transaction().unwrap();
tx.execute_batch(include_str!("../schema.sql")).unwrap();
tx.commit().unwrap();
}
}
// Functions
impl Library {
// directories
pub fn get_directories(&self) -> Vec<String> {
let db = self.conn.lock().unwrap();
query_row_map!(db, "SELECT DISTINCT directory FROM videos;", [], |x| {
x.get::<usize, String>(0)
})
}
pub fn get_directory_videos(&self, dir: &str) -> Vec<Video> {
let db = self.conn.lock().unwrap();
let videos: Vec<_> = query_row_map!(
db,
"SELECT sha256 FROM videos INNER JOIN youtube_meta ON youtube_meta.id = videos.youtube_id WHERE directory = ?1 ORDER BY youtube_meta.upload_date DESC;",
&[dir],
|x| {
Ok(Video::from_hash(
&x.get::<usize, String>(0)?,
self.conn.clone(),
))
}
);
videos
}
// YT
pub fn get_channel_name_yt(&self, id: &str) -> String {
let db = self.conn.lock().unwrap();
let res: Vec<String> = query_row_map!(
db,
"SELECT uploader_name FROM youtube_meta WHERE uploader_id = ?1",
&[id],
|x| { x.get(0) }
);
res.first().unwrap().to_owned()
}
pub fn get_tags_yt(&self) -> Vec<String> {
let db = self.conn.lock().unwrap();
let tags: Vec<_> =
query_row_map!(db, "SELECT DISTINCT tag FROM youtube_meta_tags", [], |x| {
x.get(0)
});
tags
}
pub fn get_videos_by_tag_yt(&self, tag: &str) -> Vec<Video> {
let db = self.conn.lock().unwrap();
let videos: Vec<_> = query_row_map!(
db,
"SELECT sha256 FROM youtube_meta_tags INNER JOIN youtube_meta ON youtube_meta_tags.youtube_id = youtube_meta.id INNER JOIN videos ON videos.youtube_id = youtube_meta.id WHERE tag = ?1;",
&[tag],
|x| {
Ok(Video::from_hash(
&x.get::<usize, String>(0)?,
self.conn.clone(),
))
}
);
videos
}
pub fn get_channel_videos_yt(&self, id: &str) -> Vec<Video> {
let db = self.conn.lock().unwrap();
let videos: Vec<_> = query_row_map!(
db,
"SELECT sha256 FROM youtube_meta INNER JOIN videos ON youtube_meta.id = videos.youtube_id WHERE uploader_id = ?1 ORDER BY youtube_meta.upload_date DESC;",
&[id],
|x| {
Ok(Video::from_hash(
&x.get::<usize, String>(0)?,
self.conn.clone(),
))
}
);
videos
}
// videos
pub fn get_random_videos(&self, n: usize) -> Vec<Video> {
let db = self.conn.lock().unwrap();
query_row_map!(
db,
"SELECT sha256 FROM videos ORDER BY RANDOM() LIMIT ?1;",
[n],
|x| {
Ok(Video::from_hash(
&x.get::<usize, String>(0)?,
self.conn.clone(),
))
}
)
}
pub fn get_video_by_hash(&self, hash: &str) -> Option<(String, Video)> {
let db = self.conn.lock().unwrap();
let res: Vec<(String, Video)> = query_row_map!(
db,
"SELECT sha256, directory FROM videos WHERE sha256 = ?1;",
&[hash],
|x| {
Ok((
x.get(1)?,
Video::from_hash(&x.get::<usize, String>(0)?, self.conn.clone()),
))
}
);
if !res.is_empty() {
return res.first().map(std::borrow::ToOwned::to_owned);
}
None
}
pub fn get_video_by_youtube_id(&self, id: &str) -> Option<(String, Video)> {
let db = self.conn.lock().unwrap();
let res = query_row_map!(
db,
"SELECT sha256, directory FROM videos WHERE youtube_id = ?1",
&[id],
|x| {
Ok((
x.get(1)?,
Video::from_hash(&x.get::<usize, String>(0)?, self.conn.clone()),
))
}
);
if !res.is_empty() {
return res.first().map(std::borrow::ToOwned::to_owned);
}
None
}
pub fn has_path(&self, path: &Path) -> bool {
let db = self.conn.lock().unwrap();
let mut state = db
.prepare("SELECT path FROM videos WHERE path = ?1;")
.unwrap();
let x = state
.query_map([path.to_str().unwrap()], |x| {
let r: String = x.get(0)?;
Ok(r)
})
.unwrap()
.flatten()
.next()
.is_some();
x
}
// search
pub fn search_video(&self, query: &str, start: usize, n: usize) -> Vec<Video> {
let db = self.conn.lock().unwrap();
let query = format!("%{query}%");
query_row_map!(
db,
&format!(
r#"SELECT DISTINCT
vm.sha256,
( -- Calculate a score for the video based on matches
(ym.title LIKE ?1) +
(ym.description LIKE ?1) +
(ym.uploader_name LIKE ?1) +
(vm.directory LIKE ?1)
) AS score
FROM
youtube_meta AS ym
LEFT JOIN
videos AS vm ON ym.id = vm.youtube_id
LEFT JOIN
youtube_meta_tags AS ymt ON ym.id = ymt.youtube_id
WHERE
(ym.title LIKE ?1) OR
(ym.description LIKE ?1) OR
(ym.uploader_name LIKE ?1) OR
(vm.directory LIKE ?1) OR
(ymt.tag LIKE ?1)
ORDER BY
score DESC,
ym.upload_date DESC LIMIT {n} OFFSET {start};"#
),
&[&query],
|x| {
Ok(Video::from_hash(
&x.get::<usize, String>(0)?,
self.conn.clone(),
))
}
)
}
}
// video library scan
impl Library {
pub fn scan_dir(&self, dir: &PathBuf) {
log::info!("Scanning {dir:?}");
let lib = self.get_video_paths(dir);
let _: Vec<Video> = lib
.into_par_iter()
.map(|x| Video::insert_path_to_db(&self.conn.clone(), &x))
.collect();
let db = self.conn.lock().unwrap();
db.flush_prepared_statement_cache();
log::info!("Finished scanning {dir:?}");
}
fn get_video_paths(&self, dir: &PathBuf) -> Vec<PathBuf> {
let mut videos: Vec<PathBuf> = vec![];
for entry in WalkDir::new(dir).follow_links(true) {
match entry {
Ok(entry) => {
if entry.file_type().is_file() {
log::info!("Scanning {entry:?}");
let file_name = entry.file_name().to_string_lossy();
if is_video_file(&file_name) {
let video_path = entry.path().to_path_buf();
if self.has_path(&video_path) {
continue;
}
videos.push(video_path);
}
}
}
Err(err) => eprintln!("Error reading entry: {err}"),
}
}
videos
}
}

264
src/library/video.rs Normal file
View file

@ -0,0 +1,264 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use crate::library::func::calculate_sha256_hash;
use crate::query_row_map;
use crate::yt_meta;
#[derive(Debug, Default, Clone)]
pub struct Video {
directory: Option<String>,
path: Option<PathBuf>,
title: Option<String>,
hash: String,
youtube_id: Option<String>,
description: Option<String>,
uploader_name: Option<String>,
uploader_id: Option<String>,
duration: Option<usize>,
views: Option<usize>,
categories: Option<Vec<String>>,
tags: Option<Vec<String>>,
upload_date: Option<String>,
db: Option<Arc<Mutex<rusqlite::Connection>>>,
}
// Video properties
impl Video {
fn get_video_info(&mut self) {
log::info!("Fetching Video Metadata for {}", self.hash);
let db = self.db.as_mut().unwrap().lock().unwrap();
let res: Vec<(String, String)> = query_row_map!(
db,
"SELECT title, path FROM videos WHERE sha256 = ?1",
&[&self.hash],
|x| { Ok((x.get(0)?, x.get(1)?)) }
);
let res = res.first().unwrap();
self.title = Some(res.0.clone());
self.path = Some(std::path::Path::new(&res.1).to_path_buf());
}
fn get_youtube_meta_info(&mut self) {
log::info!("Fetching YouTube Metadata for {}", self.hash);
let db = self.db.as_mut().unwrap().lock().unwrap();
let res: Vec<(String, String, String, String, String, usize)> = query_row_map!(
db,
"SELECT id, description, uploader_name, uploader_id, upload_date, views FROM youtube_meta WHERE id = (SELECT youtube_id FROM videos WHERE sha256 = ?1 LIMIT 1)",
&[&self.hash],
|x| { Ok(
( x.get(0)? , x.get(1)?, x.get(2)?, x.get(3)?, x.get(4)?, x.get(5)? )
)
}
);
if let Some(res) = res.first() {
self.youtube_id = Some(res.0.clone());
self.description = Some(res.1.clone());
self.uploader_name = Some(res.2.clone());
self.uploader_id = Some(res.3.clone());
self.upload_date = Some(res.4.clone());
self.views = Some(res.5);
let res: Vec<String> = query_row_map!(
db,
"SELECT category FROM youtube_meta_categories WHERE youtube_id = ?1",
&[self.youtube_id.as_ref().unwrap()],
|x| { x.get(0) }
);
self.categories = Some(res);
let res: Vec<String> = query_row_map!(
db,
"SELECT tag FROM youtube_meta_tags WHERE youtube_id = ?1",
&[self.youtube_id.as_ref().unwrap()],
|x| { x.get(0) }
);
self.tags = Some(res);
}
}
pub fn title(&mut self) -> Option<&str> {
if self.title.is_none() {
self.get_video_info();
}
self.title.as_deref()
}
pub fn path(&mut self) -> Option<PathBuf> {
if self.path.is_none() {
self.get_video_info();
}
self.path.as_ref().map(std::clone::Clone::clone)
}
pub fn description(&mut self) -> Option<&str> {
if self.description.is_none() {
self.get_youtube_meta_info();
}
self.description.as_deref()
}
pub fn views(&mut self) -> Option<usize> {
if self.views.is_none() {
self.get_youtube_meta_info();
}
self.views
}
pub fn uploader_name(&mut self) -> Option<&str> {
if self.uploader_name.is_none() {
self.get_youtube_meta_info();
}
self.uploader_name.as_deref()
}
pub fn uploader_id(&mut self) -> Option<&str> {
if self.uploader_id.is_none() {
self.get_youtube_meta_info();
}
self.uploader_id.as_deref()
}
pub fn upload_date(&mut self) -> Option<&str> {
if self.upload_date.is_none() {
self.get_youtube_meta_info();
}
self.upload_date.as_deref()
}
pub fn categories(&mut self) -> Option<&Vec<String>> {
if self.categories.is_none() {
self.get_youtube_meta_info();
}
self.categories.as_ref()
}
pub fn tags(&mut self) -> Option<&Vec<String>> {
if self.tags.is_none() {
self.get_youtube_meta_info();
}
self.tags.as_ref()
}
pub fn youtube_id(&mut self) -> Option<&str> {
if self.youtube_id.is_none() {
self.get_youtube_meta_info();
}
self.youtube_id.as_deref()
}
pub fn hash(&self) -> &str {
&self.hash
}
}
// Video Init
impl Video {
pub fn from_hash(hash: &str, db: Arc<Mutex<rusqlite::Connection>>) -> Self {
Self {
hash: hash.to_owned(),
db: Some(db),
..Default::default()
}
}
pub fn insert_path_to_db(db: &Arc<Mutex<rusqlite::Connection>>, v: &PathBuf) -> Self {
log::info!("Add {v:?} to library");
let id = calculate_sha256_hash(v.to_str().unwrap()).unwrap();
let file_name = v.file_stem().unwrap().to_str().unwrap().to_owned();
let dir = v
.parent()
.unwrap()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
let mut sdb = db.lock().unwrap();
let tx = sdb.transaction().unwrap();
if let Some(meta) = yt_meta::get_youtube_metadata(v) {
tx.execute(
"INSERT INTO videos (sha256, directory, path, title, youtube_id) VALUES (?1, ?2, ?3, ?4, ?5)",
[
&id,
&dir,
v.to_str().unwrap(),
&meta.title(),
&meta.youtube_id().unwrap()
]).unwrap();
let _ = tx.execute(
"INSERT INTO youtube_meta (id, title, description, uploader_name, uploader_id, duration, views, upload_date) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
[
&meta.youtube_id().unwrap(),
&meta.title(),
&meta.description().unwrap(),
&meta.uploader_name().unwrap(),
&meta.uploader_id().unwrap(),
&meta.duration().unwrap().to_string(),
&meta.views().unwrap().to_string(),
&meta.upload_date().unwrap()
]);
for cat in meta.categories().unwrap() {
let _ = tx.execute(
"INSERT INTO youtube_meta_categories (youtube_id, category) VALUES (?1, ?2)",
[&meta.youtube_id().unwrap(), &cat],
);
}
if let Some(tags) = meta.tags() {
for tag in tags {
let _ = tx.execute(
"INSERT INTO youtube_meta_tags (youtube_id, tag) VALUES (?1, ?2)",
[&meta.youtube_id().unwrap(), &tag],
);
}
}
tx.commit().unwrap();
return Self {
directory: Some(dir),
path: Some(v.to_owned()),
title: Some(meta.title()),
hash: id,
youtube_id: meta.youtube_id(),
description: meta.description(),
uploader_name: meta.uploader_name(),
uploader_id: meta.uploader_id(),
duration: meta.duration(),
views: meta.views(),
categories: meta.categories(),
tags: meta.tags(),
upload_date: meta.upload_date(),
db: Some(db.clone()),
};
}
tx.execute(
"INSERT INTO videos (sha256, directory, path, title) VALUES (?1, ?2, ?3, ?4)",
[&id, &dir, v.to_str().unwrap(), &file_name],
)
.unwrap();
tx.commit().unwrap();
Self {
path: Some(v.to_owned()),
title: Some(v.file_stem().unwrap().to_str().unwrap().to_owned()),
hash: id,
db: Some(db.clone()),
..Self::default()
}
}
}