webarc/src/main.rs
2025-02-09 23:38:17 +01:00

196 lines
6.5 KiB
Rust

use based::asset::AssetRoutes;
use based::get_pg;
use based::ui::components::{NavBar, Shell};
use based::ui::prelude::*;
use rocket::routes;
use webarc::ai::EmbedStore;
use webarc::archive::{index_archive_db, WebsiteArchive};
use webarc::conf::{get_config, load_config, load_default_config};
mod args;
mod pages;
#[tokio::main]
async fn main() {
env_logger::init();
let args = args::get_args();
let archive_dir: &String = args.get_one("dir").unwrap();
let shell = get_shell();
match args.subcommand() {
Some(("serve", serve_args)) => {
let config: &String = serve_args.get_one("config").unwrap();
load_config(config);
let arc = WebsiteArchive::new(archive_dir);
if std::env::var("DATABASE_URL").is_ok() {
let pg = get_pg!();
sqlx::migrate!("./migrations").run(pg).await.unwrap();
}
let archive = arc.clone();
if get_config().ai.is_some() {
tokio::spawn(async move {
EmbedStore::generate_embeddings_for(&archive, &get_shell()).await;
});
}
let archive = arc.clone();
tokio::spawn(async move {
webarc::favicon::download_favicons_for_sites(&archive.domains()).await;
});
let archive = arc.clone();
tokio::spawn(async move { index_archive_db(&archive).await });
rocket::build()
.mount_assets()
.mount(
"/",
routes![
pages::index,
pages::render_website,
pages::domain_info_route,
pages::favicon_route,
pages::vector_search,
pages::render_txt_website,
pages::timeline_route
],
)
.manage(arc)
.manage(shell)
.launch()
.await
.unwrap();
}
Some(("archive", archive_args)) => {
let arc = WebsiteArchive::new(archive_dir);
match archive_args.subcommand() {
Some(("list", list_args)) => {
let json = list_args.get_flag("json");
load_default_config();
let elements = if let Some(domain) = list_args.get_one::<String>("DOMAIN") {
arc.get_domain(domain)
.all_paths()
.into_iter()
.map(|x| x.path().clone())
.collect()
} else {
arc.domains()
};
if json {
println!(
"{}",
serde_json::to_string(&serde_json::json!(elements)).unwrap()
);
} else {
if let Some(domain) = list_args.get_one::<String>("DOMAIN") {
println!("Paths in {domain}:");
} else {
println!("Domains in {}:", archive_dir);
}
if elements.is_empty() {
println!("No domains");
}
for d in elements {
println!("- {d}");
}
}
}
Some(("download", dl_args)) => {
let url: &String = dl_args.get_one("URL").unwrap();
let config: &String = dl_args.get_one("config").unwrap();
load_config(config);
arc.archive_url(url).await;
println!("Saved {url} to archive");
}
Some(("versions", ver_args)) => {
load_default_config();
let domain: &String = ver_args.get_one("DOMAIN").unwrap();
let path: String = if let Some(path) = ver_args.get_one::<String>("PATH") {
path.clone()
} else {
"/".to_string()
};
let versions = arc.get_domain(domain).path(&path).versions();
let json = ver_args.get_flag("json");
if json {
println!("{}", serde_json::to_string(&versions).unwrap());
} else {
println!("Versions for {domain} / {path}:");
for v in versions {
println!("- {v}");
}
}
}
Some(("get", get_args)) => {
load_default_config();
let domain: &String = get_args.get_one("DOMAIN").unwrap();
let path = if let Some(path) = get_args.get_one::<String>("PATH") {
path.clone()
} else {
"/".to_string()
};
let doc = arc.get_domain(domain).path(&path);
let ver = if let Some(ver) = get_args.get_one::<String>("VERSION") {
ver.clone()
} else {
doc.versions().first().unwrap().clone()
};
let md = get_args.get_flag("md");
let content = doc.render_local(Some(ver), &shell).await;
if content.is_none() {
println!("No document found");
std::process::exit(1);
}
if md {
let markdown = html2md::parse_html(&content.unwrap());
println!("{markdown}");
} else {
println!("{}", content.unwrap());
}
}
Some((&_, _)) => {}
None => {}
};
}
Some((&_, _)) => {}
None => {}
}
}
pub fn get_shell() -> Shell {
Shell::new(
Nothing(),
Nothing(),
Background(MinHeight(
ScreenValue::screen,
Padding(Text("").white()).top(ScreenValue::_8),
))
.color(Zinc::_950),
)
.use_ui()
.with_navbar(NavBar("Web Archive"))
}
// TODO : archive cleanup code