init

2023-08-26 13:45:43 +02:00 · 2023-08-26 13:45:43 +02:00 · 72a8357548
commit 72a8357548
18 changed files with 5327 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+/target
+.vscode
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,23 @@
+[package]
+name = "scrape"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+thirtyfour = "0.31.0"
+tokio = { version = "1.32.0", features = ["full"] }
+clap = "2.33"
+strum = { version = "0.21", features = ["derive"] }
+regex = "1.9.3"
+chrono = "0.4.26"
+url = "2.4.0"
+serde = "1.0.183"
+serde_json = "1.0.105"
+async-trait = "0.1.73"
+log = "0.4"
+env_logger = "0.10"
+base64 = "0.21.2"
+reqwest = { version = "0.11.18" }
+urlencoding = "2.1.3"
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
+# scrape
+
+Scrape is a tool to scrape websites and turn their data into JSON. Like yt-dlp, but for web scraping.
--- a/docs/add-new-extractor.md
+++ b/docs/add-new-extractor.md
@ -0,0 +1,59 @@
+# Adding a new Extractor
+
+## Create a New Extractor Source File
+Create a new Rust source file inside `src/extractors`.
+
+`src/extractors/myext.rs`:
+```rust
+use super::prelude::*;
+
+pub struct MySiteExtractor;
+
+impl MySiteExtractor {
+    pub fn new() -> Self { Self {} }
+}
+```
+
+## Implement the Extractor Trait
+Implement the Extractor trait by providing the required methods: `supported_hosts`, `name`, and `run_scrape`.
+```rust
+#[async_trait]
+impl Extractor for MySiteExtractor {
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["my-site.com"]
+    }
+
+    fn name(&self) -> String {
+        "My Site Extractor".to_string()
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        // scraping logic
+        unimplemented!()
+    }
+}
+```
+
+## Register the Extractor
+In `src/extractors/mod.rs` add the following:
+
+```rust
+pub mod myext;
+
+[...]
+
+#[must_use]
+pub fn get_extractors() -> Vec<Box<dyn Extractor>> {
+    vec![
+        ...
+        // Add your new extractor
+        Box::new(myext::MySiteExtractor::new()),
+        ...
+    ]
+}
+```
--- a/src/extractors/amazon.rs
+++ b/src/extractors/amazon.rs
@ -0,0 +1,123 @@
+use crate::{
+    util::{currency, escape_key},
+    Language,
+};
+
+use super::prelude::*;
+
+pub struct AmazonExtractor {}
+
+impl AmazonExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    pub async fn amazon_product(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        let mut url = url;
+        match conf.language {
+            Language::en_US => {
+                url.query_pairs_mut().append_pair("language", "en_GB");
+                b.goto(url.to_string()).await.unwrap();
+            }
+            _ => {
+                url.query_pairs_mut()
+                    .append_pair("language", &conf.language.to_string());
+                b.goto(url.to_string()).await.unwrap();
+            }
+        }
+        log::info!(
+            "Changing to '{url}' because of {:?} language",
+            conf.language
+        );
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        b.click_on_xpath(r#"//*[@id="sp-cc-accept"]"#).await;
+
+        info.insert(
+            "product_title".into(),
+            b.get_element_text_by_xpath(r#"//*[@id="productTitle"]"#)
+                .await
+                .unwrap()
+                .into(),
+        );
+
+        info.insert(
+            "star_rating".into(),
+            b.get_element_text_by_xpath(
+                r#"//*[@class="reviewCountTextLinkedHistogram noUnderline"]/span[1]/a/span"#,
+            )
+            .await
+            .unwrap()
+            .replace(',', ".")
+            .parse::<f64>()
+            .unwrap()
+            .into(),
+        );
+
+        let price_symbol = b
+            .get_element_text_by_xpath(
+                r#"//*[@id="corePrice_feature_div"]//span[@class="a-price-symbol"]"#,
+            )
+            .await
+            .unwrap();
+        let whole = b
+            .get_element_text_by_xpath(
+                r#"//*[@id="corePrice_feature_div"]//span[@class="a-price-whole"]"#,
+            )
+            .await
+            .unwrap()
+            .replace(['.', ','], "");
+        let fraction = b
+            .get_element_text_by_xpath(
+                r#"//*[@id="corePrice_feature_div"]//span[@class="a-price-fraction"]"#,
+            )
+            .await
+            .unwrap();
+
+        info.insert(
+            "price".into(),
+            currency(&format!("{whole}.{fraction}{price_symbol}")).into(),
+        );
+
+        let mut tech_details: Map<String, Value> = Map::new();
+        let tech_details_html = b
+            .get_element_by_xpath(r#"//*[@id="productDetails_techSpec_section_1"]/tbody"#)
+            .await
+            .unwrap();
+        for detail in tech_details_html.find_all(By::Tag("tr")).await.unwrap() {
+            let key = text_from!(detail.find(By::Tag("th")).await.unwrap());
+            let value = text_from!(detail.find(By::Tag("td")).await.unwrap());
+            tech_details.insert(escape_key(&key), value.into());
+        }
+
+        info.insert("technical_details".into(), tech_details.into());
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for AmazonExtractor {
+    fn name(&self) -> String {
+        "AMAZON".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["www.amazon.de"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.amazon_product(url, browser, conf).await
+    }
+}
--- a/src/extractors/anilist.rs
+++ b/src/extractors/anilist.rs
@ -0,0 +1,343 @@
+use regex::Regex;
+
+use crate::util::{escape_key, extract_texts_from_elements, handle_media_url};
+
+use super::prelude::*;
+
+pub struct AnilistExtractor {}
+
+impl AnilistExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    async fn anime(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        if b.wait_for(r#"//span[text()="AGREE"]"#, Duration::from_secs(5))
+            .await
+        {
+            b.click_on_xpath(r#"//span[text()="AGREE"]"#).await;
+        }
+
+        if b.wait_for(
+            r#"//div[@class="header"]//div[@class="content"]/h1"#,
+            Duration::from_secs(5),
+        )
+        .await
+        {
+            info.insert(
+                "title".into(),
+                b.get_element_text_by_xpath(r#"//div[@class="header"]//div[@class="content"]/h1"#)
+                    .await
+                    .unwrap()
+                    .into(),
+            );
+
+            info.insert(
+                "cover".into(),
+                handle_media_url(
+                    &b.get_element_attr_by_xpath(
+                        r#"//div[@class="header"]//div[@class="cover-wrap-inner"]/img"#,
+                        "src",
+                    )
+                    .await
+                    .unwrap(),
+                    "cover",
+                    false,
+                    conf,
+                )
+                .await
+                .into(),
+            );
+
+            if let Some(desc_show_more) = b
+                .get_element_by_xpath(r#"//span[@class="description-length-toggle"]"#)
+                .await
+            {
+                b.scroll_to_element(&desc_show_more).await.unwrap();
+                desc_show_more.click().await.unwrap();
+            }
+
+            info.insert(
+                "description".into(),
+                b.get_element_text_by_xpath(r#"//div[@class="header"]//p[@class="description"]"#)
+                    .await
+                    .unwrap()
+                    .into(),
+            );
+
+            let data_html = b
+                .get_element_by_xpath(r#"//div[@class="data"]"#)
+                .await
+                .unwrap();
+
+            for data_field in data_html.find_all(By::XPath("./div")).await.unwrap() {
+                let key = escape_key(&text_from!(data_field
+                    .find(By::XPath(r#"./div[@class="type"]"#))
+                    .await
+                    .unwrap()));
+                let val = data_field
+                    .find(By::XPath(r#"./*[@class="value"]"#))
+                    .await
+                    .unwrap();
+                let val_list = val.find_all(By::XPath("./span")).await.unwrap();
+                let val_list = extract_texts_from_elements(val_list).await;
+                // TODO : parse data fields
+                if val_list.is_empty() {
+                    info.insert(key, text_from!(val).into());
+                } else {
+                    info.insert(key, val_list.into());
+                }
+            }
+
+            let mut tags: Vec<String> = vec![];
+            let tags_html = b
+                .get_element_by_xpath(r#"//div[@class="tags"]"#)
+                .await
+                .unwrap();
+            for tag_html in tags_html
+                .find_all(By::XPath(r#"./div[@class="tag"]"#))
+                .await
+                .unwrap()
+            {
+                tags.push(text_from!(tag_html
+                    .find(By::XPath(r#"./a[1]"#))
+                    .await
+                    .unwrap()));
+            }
+            info.insert("tags".into(), tags.into());
+
+            let mut websites: Map<String, Value> = Map::new();
+            let websites_html = b
+                .get_element_by_xpath(r#"//div[@class="external-links"]/div"#)
+                .await
+                .unwrap();
+            for website in websites_html.find_all(By::XPath("./a")).await.unwrap() {
+                let web_link = attr_from!(website, "href");
+                let mut web_name = text_from!(website
+                    .find(By::XPath(r#"./span[@class="name"]"#))
+                    .await
+                    .unwrap());
+                let language = match website
+                    .find(By::XPath(r#".//span[@class="language"]"#))
+                    .await
+                {
+                    Ok(el) => Some(text_from!(el)),
+                    Err(_) => None,
+                };
+                if language.is_some() {
+                    web_name = web_name.replace(&format!(" {}", language.clone().unwrap()), "");
+                }
+                websites.insert(
+                    web_name,
+                    json!({
+                        "url": web_link,
+                        "language": language
+                    }),
+                );
+            }
+            info.insert("websites".into(), websites.into());
+
+            let mut relations: Vec<Value> = vec![];
+            let relations_html = b
+                .get_element_by_xpath(r#"//div[@class="relations"]"#)
+                .await
+                .unwrap();
+            for relation_html in relations_html
+                .find_all(By::XPath(r#"./div/div"#))
+                .await
+                .unwrap()
+            {
+                let relation_info = text_from!(relation_html
+                    .find(By::XPath(r#"./div[@class="content"]/div[@class="info"]"#))
+                    .await
+                    .unwrap());
+                let (rel_type, rel_status) = relation_info.split_once(" · ").unwrap();
+                let relation = json!({
+                    "url": b.get_url_from_link(relation_html.find(By::XPath("./a")).await.unwrap()).await,
+                    "kind": text_from!(relation_html.find(By::XPath(r#"./div[@class="content"]/div[@class="info-header"]/div"#)).await.unwrap()),
+                    "title": text_from!(relation_html.find(By::XPath(r#"./div[@class="content"]/a[@class="title"]"#)).await.unwrap()),
+                    "type": rel_type,
+                    "status": rel_status
+                });
+                relations.push(relation);
+            }
+            info.insert("relations".into(), relations.into());
+
+            let recommendations_html = b
+                .get_element_by_xpath(r#"//div[@class="recommendations"]"#)
+                .await
+                .unwrap();
+            let show_all_button = recommendations_html
+                .find(By::XPath(r#".//div[@class="view-all"]/div"#))
+                .await
+                .unwrap();
+            b.scroll_to_element(&show_all_button).await.unwrap();
+            show_all_button.click().await.unwrap();
+
+            let mut recommendations: Map<String, Value> = Map::new();
+            for rec_html in recommendations_html
+                .find_all(By::XPath(r#"./div/div[@class="recommendation-card"]/a"#))
+                .await
+                .unwrap()
+            {
+                let rec_url = b.get_url_from_link(rec_html.clone()).await;
+                let rec_title = text_from!(rec_html);
+                recommendations.insert(rec_title, rec_url.into());
+            }
+            info.insert("recommendations".into(), recommendations.into());
+
+            let mut nav_urls: Vec<String> = vec![];
+            for nav in b
+                .get_elements_by_xpath(r#"//div[@class="nav"]/a"#)
+                .await
+                .unwrap()
+            {
+                nav_urls.push(attr_from!(nav, "href"));
+            }
+
+            for nav_url in nav_urls {
+                if nav_url.ends_with("characters") {
+                    info.insert(
+                        "characters".into(),
+                        self.characters_page(&nav_url, b, conf).await?.into(),
+                    );
+                }
+                if nav_url.ends_with("staff") {
+                    info.insert(
+                        "staff".into(),
+                        self.staff_page(&nav_url, b, conf).await?.into(),
+                    );
+                }
+                if nav_url.ends_with("stats") {
+                    info.insert(
+                        "stats".into(),
+                        self.stats_page(&nav_url, b, conf).await?.into(),
+                    );
+                }
+                // todo : reviews?
+            }
+        } else {
+            Err("Scrape failed")?;
+        }
+
+        Ok(info)
+    }
+
+    async fn characters_page(
+        &self,
+        _url: &str,
+        _b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<Map<String, Value>, String> {
+        // todo : character page
+        Ok(Map::new())
+    }
+    async fn staff_page(
+        &self,
+        _url: &str,
+        _b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<Map<String, Value>, String> {
+        // todo : staff page
+        Ok(Map::new())
+    }
+    async fn stats_page(
+        &self,
+        url: &str,
+        b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<Map<String, Value>, String> {
+        let mut data = Map::new();
+
+        b.goto(url).await.unwrap();
+
+        if b.wait_for(r#"//div[@class="rankings graph"]"#, Duration::from_secs(5))
+            .await
+        {
+            let mut ranking = Map::new();
+
+            for rank in b.get_elements_by_xpath(r#"//div[@class="rankings graph"]/a[@class="ranking popular"]/span[@class="rank-text"]"#).await.unwrap() {
+                    let re = Regex::new(r"#(\d+)").unwrap();
+
+                    if let Some(matched) = re.find(&rank.text().await.unwrap()) {
+                        if let Ok(rank_num) = matched.as_str()[1..].parse::<i32>() {
+                            let rank_key = text_from!(rank).replacen(matched.as_str(), "", 1).trim().to_string();
+                            ranking.insert(escape_key(&rank_key), rank_num.into());
+                        }
+                    }
+                }
+
+            for rank in b.get_elements_by_xpath(r#"//div[@class="rankings graph"]/a[@class="ranking rated"]/span[@class="rank-text"]"#).await.unwrap() {
+                    let re = Regex::new(r"#(\d+)").unwrap();
+
+                    if let Some(matched) = re.find(&rank.text().await.unwrap()) {
+                        if let Ok(rank_num) = matched.as_str()[1..].parse::<i32>() {
+                            let rank_key = text_from!(rank).replacen(matched.as_str(), "", 1).trim().to_string();
+                            ranking.insert(escape_key(&rank_key), rank_num.into());
+                        }
+                    }
+                }
+
+            data.insert("ranking".into(), ranking.into());
+
+            let mut viewer_status: Map<String, Value> = Map::new();
+            if b.wait_for(r#"//div[@class="status-distribution content-wrap"]/div[@class="statuses"]/div[@class="status"]"#, Duration::from_secs(5)).await {
+                    b.scroll_to_element(&b.get_element_by_xpath(r#"//div[@class="status-distribution content-wrap"]/div[@class="statuses"]/div[@class="status"]"#).await.unwrap()).await.unwrap();
+                }
+            let viewer_status_dist = b.get_elements_by_xpath(r#"//div[@class="status-distribution content-wrap"]/div[@class="statuses"]/div[@class="status"]"#).await.unwrap();
+            for status in viewer_status_dist {
+                b.scroll_to_element(&status).await.unwrap();
+                let key = text_from!(status
+                    .find(By::XPath(r#"./div[@class="name"]"#))
+                    .await
+                    .unwrap());
+                if !key.is_empty() {
+                    let val: isize = text_from!(status
+                        .find(By::XPath(r#"./div[@class="amount"]"#))
+                        .await
+                        .unwrap())
+                    .replace(" Users", "")
+                    .parse()
+                    .unwrap();
+                    viewer_status.insert(escape_key(&key), val.into());
+                }
+            }
+            data.insert("viewer_status".into(), viewer_status.into());
+
+            // TODO : not complete
+        } else {
+            Err("Scrape failed")?;
+        }
+
+        Ok(data)
+    }
+}
+
+#[async_trait]
+impl Extractor for AnilistExtractor {
+    fn name(&self) -> String {
+        "ANILIST".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["anilist.co"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.anime(url, browser, conf).await
+    }
+}
--- a/src/extractors/anisearch.rs
+++ b/src/extractors/anisearch.rs
@ -0,0 +1,289 @@
+use thirtyfour::WebElement;
+
+use crate::util::{escape_key, handle_media_url, remove_last_n_chars};
+
+use super::prelude::*;
+
+async fn split_header(el: &WebElement) -> (String, String) {
+    let header = text_from!(el
+        .find(By::XPath(r#".//span[@class="header"]"#))
+        .await
+        .unwrap());
+    let value = text_from!(el);
+    let value = value.replacen(&header, "", 1).trim().to_owned();
+    (header.replace(':', ""), value)
+}
+
+pub struct AnisearchExtractor {}
+
+impl AnisearchExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    async fn anime_search(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        info.insert(
+            "query".into(),
+            remove_last_n_chars(
+                &b.get_element_text_by_xpath(r#"//*[@id="item-key-a-text"]"#)
+                    .await
+                    .unwrap()
+                    .replace("Title starts with \"", ""),
+                1,
+            )
+            .into(),
+        );
+
+        let mut results: Vec<String> = vec![];
+        for result in b
+            .get_elements_by_xpath(r#"//ul[@class="covers"]/li"#)
+            .await
+            .unwrap()
+        {
+            let link = b
+                .get_url_from_link(result.find(By::XPath(".//a")).await.unwrap())
+                .await;
+            results.push(link);
+        }
+
+        info.insert("results".into(), results.into());
+
+        Ok(info)
+    }
+
+    async fn anime(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        b.goto(url.to_string()).await.unwrap();
+
+        if b.wait_for(
+            r#"//div[@class="needsclick cmp-root-container"]"#,
+            Duration::from_secs(3),
+        )
+        .await
+        {
+            let script = r#"
+            return document.querySelector("\#top > div.needsclick.cmp-root-container").shadowRoot.querySelector("\#consentDialog > div.cmp_ui.cmp_ext_text.cmp_state-stacks > div.cmp_navi > div > div.cmp_mainButtons > div > div.cmp_primaryButtonLine > div > div")
+            "#;
+            let accept_cookies = b.execute(script, ([]).to_vec()).await.unwrap();
+            accept_cookies.element().unwrap().click().await.unwrap();
+        }
+
+        let anime_info_section = b
+            .get_element_by_xpath(r#"//section[@id="information"]"#)
+            .await
+            .unwrap();
+
+        let title_element = anime_info_section
+            .find(By::XPath(r#".//div[@class="title"]//strong[@class="f16"]"#))
+            .await
+            .unwrap();
+        info.insert(
+            "original_title".into(),
+            text_from!(anime_info_section
+                .find(By::XPath(r#".//div[@class="title"]//div"#))
+                .await
+                .unwrap())
+            .into(),
+        );
+        info.insert("title".into(), text_from!(title_element).into());
+
+        let cover_image_url = attr_from!(
+            anime_info_section
+                .find(By::XPath(r#".//figure[@id="cover-container"]/img"#))
+                .await
+                .unwrap(),
+            "src"
+        );
+        info.insert(
+            "cover".into(),
+            handle_media_url(&cover_image_url, "cover", false, conf)
+                .await
+                .into(),
+        );
+
+        let mut details: Map<String, Value> = Map::new();
+        let details_elements = anime_info_section
+            .find_all(By::XPath(r#"./div/ul/li[2]/ul/li[1]/div"#))
+            .await
+            .unwrap();
+        for element in details_elements {
+            if attr_from!(element, "class") == "title" {
+                continue;
+            }
+            let (key, val) = split_header(&element).await;
+            if attr_from!(element, "class") == "creators" {
+                details.insert(
+                    escape_key(&key),
+                    val.split(", ").collect::<Vec<&str>>().into(),
+                );
+                continue;
+            }
+            if attr_from!(element, "class") == "websites" {
+                let mut links: Vec<String> = vec![];
+                let links_html = element.find_all(By::XPath("./a")).await.unwrap();
+                for l in links_html {
+                    links.push(attr_from!(l, "href"));
+                }
+                details.insert(escape_key(&key), links.into());
+                continue;
+            }
+            details.insert(escape_key(&key), val.into());
+        }
+        info.insert("details".into(), details.into());
+
+        for desc in b
+            .get_elements_by_xpath(r#"//section[@id="description"]//button"#)
+            .await
+            .unwrap()
+        {
+            let desc_lang = attr_from!(desc, "lang");
+            if try_attr_from!(desc, "class").unwrap_or(String::new()) != "active" {
+                let show_more_button = b
+                    .get_element_by_xpath(&format!(
+                        r#"//section[@id="description"]//button[@lang="{desc_lang}"]"#
+                    ))
+                    .await
+                    .unwrap();
+                b.scroll_to_element(&show_more_button).await.unwrap();
+                show_more_button.click().await.unwrap();
+            }
+        }
+
+        let mut descriptions: Map<String, Value> = Map::new();
+        for desc in b
+            .get_elements_by_xpath(
+                r#"//section[@id="description"]//div[@class="textblock details-text"]"#,
+            )
+            .await
+            .unwrap()
+        {
+            let desc_lang = attr_from!(desc, "lang");
+            let desc_text = text_from!(desc);
+            descriptions.insert(desc_lang, desc_text.into());
+        }
+        info.insert("description".into(), descriptions.into());
+
+        let tag_cloud = b
+            .get_element_by_xpath(r#"//*[@id="description"]//ul[@class="cloud"]"#)
+            .await
+            .unwrap();
+        let mut genres = json!({"main": [], "sub": []}).as_object().unwrap().clone();
+        let mut tags: Vec<String> = vec![];
+        for tag in tag_cloud.find_all(By::XPath("./li/a")).await.unwrap() {
+            if attr_from!(tag, "class") == "gg showpop" {
+                genres
+                    .get_mut("main")
+                    .unwrap()
+                    .as_array_mut()
+                    .unwrap()
+                    .push(text_from!(tag).into());
+            }
+            if attr_from!(tag, "class") == "gc showpop" && !text_from!(tag).is_empty() {
+                genres
+                    .get_mut("sub")
+                    .unwrap()
+                    .as_array_mut()
+                    .unwrap()
+                    .push(text_from!(tag).into());
+            }
+            if attr_from!(tag, "class") == "gt showpop" {
+                tags.push(text_from!(tag));
+            }
+        }
+        info.insert("genres".into(), genres.into());
+        info.insert("tags".into(), tags.into());
+
+        if let Some(show_more_button) = b
+            .get_element_by_xpath(r#"//*[@id="information"]/div/ul/li[2]/div/button"#)
+            .await
+        {
+            b.scroll_to_element(&show_more_button).await.unwrap();
+            show_more_button.click().await.unwrap();
+        }
+
+        let lang_html = b
+            .get_elements_by_xpath(r#"//*[@id="information"]/div/ul/li[2]/ul/li"#)
+            .await
+            .unwrap();
+        let mut dubs: Map<String, Value> = Map::new();
+
+        if let Some(ol) = b.get_element_by_xpath(r#"//div[@class="title"]"#).await {
+            dubs.insert(attr_from!(ol, "lang"), Value::Object(Map::new()));
+        }
+        let mut subs: Map<String, Value> = Map::new();
+
+        for dub in lang_html {
+            let lang_info = dub.find_all(By::XPath("./div")).await.unwrap();
+            if lang_info.len() != 4 {
+                continue;
+            }
+
+            let lang_lang = attr_from!(lang_info[0], "lang");
+            let mut is_dub = false;
+            if lang_info[0]
+                .find(By::XPath(r#".//span[@class="speaker"]"#))
+                .await
+                .is_ok()
+            {
+                is_dub = true;
+            }
+
+            let lang_status = split_header(&lang_info[1]).await;
+            let lang_release = split_header(&lang_info[2]).await;
+            let lang_publisher = split_header(&lang_info[3]).await;
+
+            let lang_map = json!({
+                escape_key(&lang_status.0): lang_status.1,
+                escape_key(&lang_release.0): lang_release.1,
+                escape_key(&lang_publisher.0): lang_publisher.1
+            });
+            if is_dub {
+                dubs.insert(lang_lang, lang_map);
+            } else {
+                subs.insert(lang_lang, lang_map);
+            }
+        }
+        info.insert("dubs".into(), dubs.into());
+        info.insert("subs".into(), subs.into());
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for AnisearchExtractor {
+    fn name(&self) -> String {
+        "ANISEARCH".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["www.anisearch.com"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        if url.path().starts_with("/anime/index") {
+            self.anime_search(url, browser, conf).await
+        } else {
+            self.anime(url, browser, conf).await
+        }
+    }
+}
--- a/src/extractors/aur.rs
+++ b/src/extractors/aur.rs
@ -0,0 +1,228 @@
+use super::prelude::*;
+
+pub struct AURExtractor {}
+
+impl AURExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    pub async fn aur_package(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let head = text_from!(b
+            .get_element_by_xpath(r#"//*[@id="pkgdetails"]/h2"#)
+            .await
+            .unwrap())[17..]
+            .to_owned();
+        let (name, version) = head.split_once(' ').unwrap();
+
+        let pkg_info = b
+            .get_element_by_xpath(r#"//*[@id="pkginfo"]"#)
+            .await
+            .unwrap();
+
+        let mut info: HashMap<String, Value> = {
+            let mut hm = HashMap::new();
+            hm.insert("name".into(), name.into());
+            hm.insert("version".into(), version.into());
+            hm
+        };
+
+        for row in pkg_info.find_all(By::Tag("tr")).await.unwrap() {
+            match text_from!(row.find(By::Tag("th")).await.unwrap()).as_str() {
+                "Git Clone URL:" => {
+                    info.insert(
+                        "clone".into(),
+                        attr_from!(row.find(By::Tag("a")).await.unwrap(), "href").into(),
+                    );
+                }
+                "Description:" => {
+                    info.insert(
+                        "description".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Upstream URL:" => {
+                    info.insert(
+                        "upstream".into(),
+                        attr_from!(row.find(By::Tag("a")).await.unwrap(), "href").into(),
+                    );
+                }
+                "Keywords:" => {
+                    let keyword_items = row.find_all(By::Tag("a")).await.unwrap();
+                    let mut keywords: Vec<String> = vec![];
+                    for kw in keyword_items {
+                        keywords.push(text_from!(kw));
+                    }
+                    info.insert("keywords".into(), keywords.into());
+                }
+                "Licenses:" => {
+                    info.insert(
+                        "license".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Submitter:" => {
+                    info.insert(
+                        "submitter".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Maintainer:" => {
+                    info.insert(
+                        "maintainer".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Last Packager:" => {
+                    info.insert(
+                        "last_packager".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Votes:" => {
+                    info.insert(
+                        "votes".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap())
+                            .parse::<usize>()
+                            .unwrap()
+                            .into(),
+                    );
+                }
+                "Popularity:" => {
+                    info.insert(
+                        "popularity".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap())
+                            .parse::<f64>()
+                            .unwrap()
+                            .into(),
+                    );
+                }
+                "First Submitted:" => {
+                    info.insert(
+                        "first_submitted".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                "Last Updated:" => {
+                    info.insert(
+                        "last_updated".into(),
+                        text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                    );
+                }
+                _ => {
+                    log::debug!("unknown column");
+                }
+            }
+        }
+
+        let mut dependencies: Vec<Value> = vec![];
+        let dependency_items = b
+            .get_element_by_xpath(r#"//*[@id="pkgdepslist"]"#)
+            .await
+            .unwrap();
+        let mut deps = dependency_items.find_all(By::Tag("li")).await.unwrap();
+        if !deps.is_empty() {
+            if text_from!(deps.last().unwrap()).contains("Show ") {
+                b.goto(attr_from!(
+                    deps.last().unwrap().find(By::XPath("./a")).await.unwrap(),
+                    "href"
+                ))
+                .await
+                .unwrap();
+                let dependency_items = b
+                    .get_element_by_xpath(r#"//*[@id="pkgdepslist"]"#)
+                    .await
+                    .unwrap();
+                deps = dependency_items.find_all(By::Tag("li")).await.unwrap();
+            }
+
+            for dep in deps {
+                let dep_name = text_from!(dep.find(By::Tag("a")).await.unwrap());
+                let dep_info =
+                    text_from!(dep.find_all(By::Tag("em")).await.unwrap().last().unwrap());
+                dependencies.push(json!({
+                        "name": dep_name,
+                        "info": dep_info
+                }));
+            }
+        }
+        info.insert("dependencies".into(), dependencies.into());
+
+        let mut required_by: Vec<Value> = vec![];
+        let required_by_items = b
+            .get_element_by_xpath(r#"//*[@id="pkgreqslist"]"#)
+            .await
+            .unwrap();
+        let mut reqs = required_by_items.find_all(By::Tag("li")).await.unwrap();
+        if !reqs.is_empty() {
+            if text_from!(reqs.last().unwrap()).contains("Show ") {
+                b.goto(attr_from!(
+                    reqs.last().unwrap().find(By::XPath("./a")).await.unwrap(),
+                    "href"
+                ))
+                .await
+                .unwrap();
+                let required_by_items = b
+                    .get_element_by_xpath(r#"//*[@id="pkgreqslist"]"#)
+                    .await
+                    .unwrap();
+                reqs = required_by_items.find_all(By::Tag("li")).await.unwrap();
+            }
+
+            for req in reqs {
+                let req_name = text_from!(req.find(By::Tag("a")).await.unwrap());
+                let req_info =
+                    text_from!(req.find_all(By::Tag("em")).await.unwrap().last().unwrap());
+                required_by.push(json!({
+                    "name": req_name,
+                    "optional": (req_info == "(optional)")
+                }));
+            }
+        }
+
+        info.insert("required_by".into(), required_by.into());
+
+        let mut sources: Vec<String> = vec![];
+        for source in b
+            .get_element_by_xpath(r#"//*[@id="pkgsrcslist"]"#)
+            .await
+            .unwrap()
+            .find_all(By::Tag("li"))
+            .await
+            .unwrap()
+        {
+            sources.push(attr_from!(source.find(By::Tag("a")).await.unwrap(), "href"));
+        }
+
+        info.insert("sources".into(), sources.into());
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for AURExtractor {
+    fn name(&self) -> String {
+        "AUR".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["aur.archlinux.org"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.aur_package(url, browser, conf).await
+    }
+}
--- a/src/extractors/igdb.rs
+++ b/src/extractors/igdb.rs
@ -0,0 +1,421 @@
+use super::prelude::*;
+use crate::util::{escape_key, handle_media_url, parse_date, remove_last_n_chars};
+
+pub struct IGDBExtractor {}
+impl IGDBExtractor {
+    pub const fn new() -> Self {
+        Self {}
+    }
+}
+
+impl IGDBExtractor {
+    async fn igdb_game(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        info.insert(
+            "name".to_owned(),
+            remove_last_n_chars(
+                &b.get_element_text_by_xpath(r#"//*[@class="gamepage-title-wrapper"]/h1"#)
+                    .await
+                    .ok_or("could not get game name")?,
+                4,
+            )
+            .into(),
+        );
+
+        info.insert(
+            "id".to_owned(),
+            b.get_element_text_by_xpath(r#"//*[@class="optimisly-game-maininfo"]/div[1]/span"#)
+                .await
+                .ok_or("could not get id")?
+                .into(),
+        );
+
+        let cover_url = b
+            .get_element_attr_by_xpath(r#"//*[@class="gamepage-cover"]/img[1]"#, "src")
+            .await
+            .ok_or("could not get cover url")?;
+        info.insert(
+            "cover".to_owned(),
+            handle_media_url(
+                &cover_url,
+                &format!("igdb-{}-cover", info.get("id").unwrap().as_str().unwrap()),
+                false,
+                conf,
+            )
+            .await
+            .into(),
+        );
+
+        let genre_and_platform_htmls = b
+            .get_elements_by_xpath(
+                r#"//*[@class="gamepage-tabs"]/div[2]/p/span[@class="text-semibold"]/.."#,
+            )
+            .await
+            .unwrap();
+
+        let genres_html = genre_and_platform_htmls.first().unwrap();
+        let mut genres_html = genres_html
+            .find_all(thirtyfour::By::Tag("a"))
+            .await
+            .ok()
+            .ok_or("could not get genres")?;
+        let mut genres: Vec<String> = vec![];
+        for genre in &mut genres_html {
+            genres.push(text_from!(genre));
+        }
+        info.insert("genre".to_owned(), genres.into());
+
+        let platforms_html = genre_and_platform_htmls.get(1).unwrap();
+        let platforms_txt = text_from!(platforms_html)[11..].to_owned();
+        let mut platforms: Vec<String> = vec![];
+        for platform in platforms_txt.split(", ") {
+            platforms.push(platform.to_owned());
+        }
+        info.insert("platforms".to_owned(), platforms.into());
+
+        info.insert(
+            "url".to_owned(),
+            b.get_element_attr_by_xpath(
+                r#"//*[@class="gamepage-tabs"]/div[4]/div[@class="input-group"]/input"#,
+                "value",
+            )
+            .await
+            .ok_or("could not get url")?
+            .into(),
+        );
+
+        let desc = b
+            .get_element_by_xpath(r#"//*[@class="gamepage-tabs"]/div[2]/div[1]"#)
+            .await
+            .ok_or("could not get description")?;
+        if let Ok(show_more) = desc.find(thirtyfour::By::Tag("span")).await {
+            show_more.click().await.unwrap();
+        }
+        info.insert("description".to_owned(), text_from!(desc).into());
+
+        let date_str = b
+            .get_element_text_by_xpath(r#"//*[@class="banner-subheading"]/span[1]/span[1]"#)
+            .await
+            .ok_or("could not get release date")?;
+        info.insert(
+            "release".to_owned(),
+            if date_str == "TBD" {
+                Option::<String>::None.into()
+            } else {
+                Some(parse_date(&date_str, "%b %d, %Y").ok_or("could not parse release date")?)
+                    .into()
+            },
+        );
+
+        let mut releases: Vec<Map<String, Value>> = vec![];
+        let releases_html = b
+            .find(thirtyfour::By::XPath(
+                r#"//*[@class="optimisly-game-maininfo"]/div[2]"#,
+            ))
+            .await
+            .ok()
+            .ok_or("could not get releases")?;
+        for release in releases_html
+            .find_all(thirtyfour::By::XPath("./*"))
+            .await
+            .unwrap()
+        {
+            let release_platform = text_from!(release
+                .find(thirtyfour::By::XPath("./div[1]/span"))
+                .await
+                .unwrap());
+
+            let release_info_html = release
+                .find(By::XPath("./div[2]/div[1]/div[1]/span"))
+                .await
+                .unwrap();
+            let release_date = text_from!(release_info_html.find(By::Tag("time")).await.unwrap());
+            let release_info = text_from!(release_info_html.find(By::Tag("strong")).await.unwrap());
+            releases.push(
+                json!({
+                    "platform": release_platform,
+                    "date": release_date,
+                    "info": release_info
+                })
+                .as_object()
+                .unwrap()
+                .clone(),
+            );
+        }
+        info.insert("releases".into(), releases.into());
+
+        let mut developers: Vec<String> = vec![];
+        let developers_html = b
+            .get_element_by_xpath(
+                r#"//*[@class="optimisly-game-maininfo"]/div[@itemprop="author"]/span"#,
+            )
+            .await
+            .ok_or("could not get developers")?;
+        for dev in developers_html.find_all(By::Tag("a")).await.unwrap() {
+            developers.push(text_from!(dev));
+        }
+        info.insert("developers".into(), developers.into());
+
+        let mut publishers: Vec<String> = vec![];
+        if let Some(publishers_html) = b
+            .get_element_by_xpath(
+                r#"//*[@class="optimisly-game-maininfo"]/span[@itemprop="publisher"]/span"#,
+            )
+            .await
+        {
+            for publ in publishers_html.find_all(By::Tag("a")).await.unwrap() {
+                publishers.push(text_from!(publ));
+            }
+            info.insert("publishers".into(), publishers.into());
+        } else {
+            log::warn!("could not get publishers");
+        }
+
+        let mut ratings: Map<String, Value> = Map::new();
+        let ratings_html = b
+            .find(By::XPath(r#"//*[@class="gamepage-gauge"]"#))
+            .await
+            .ok()
+            .ok_or("could not get ratings")?;
+        let ratings_html = {
+            let mut el: Vec<String> = vec![];
+            for r in &ratings_html.find_all(By::Tag("text")).await.unwrap() {
+                el.push(text_from!(r));
+            }
+            el
+        };
+        let ratings_txt: Vec<String> = ratings_html
+            .into_iter()
+            .filter(|x| x.chars().all(char::is_numeric) || x == "N/A")
+            .collect();
+
+        ratings.insert(
+            "member".into(),
+            (if ratings_txt[0] == "N/A" {
+                None
+            } else {
+                Some(ratings_txt[0].parse::<usize>().unwrap())
+            })
+            .into(),
+        );
+        ratings.insert(
+            "critic".into(),
+            (if ratings_txt[1] == "N/A" {
+                None
+            } else {
+                Some(ratings_txt[1].parse::<usize>().unwrap())
+            })
+            .into(),
+        );
+        info.insert("ratings".into(), ratings.into());
+
+        let mut ttb: Map<String, Value> = Map::new();
+        if let Some(ttb_data) = b
+            .get_element_by_xpath(r#"//*[@id="content-page"]/div[2]/aside/table/tbody"#)
+            .await
+        {
+            for row in ttb_data.find_all(By::Tag("tr")).await.unwrap() {
+                ttb.insert(
+                    remove_last_n_chars(&text_from!(row.find(By::Tag("th")).await.unwrap()), 1),
+                    text_from!(row.find(By::Tag("td")).await.unwrap()).into(),
+                );
+            }
+            info.insert("time_to_beat".into(), ttb.into());
+        } else {
+            log::warn!("could not get time to beat");
+        }
+
+        b.scroll_to_end().await.unwrap();
+
+        if let Some(show_more) = b.get_element_by_xpath(r#"//*[@id="game-storyline"]/span[@class="text-purple cursor-pointer charLimitMore"]"#).await {
+            show_more.click().await.unwrap();
+        }
+        if let Some(storyline_html) = b
+            .get_element_text_by_xpath(r#"//*[@id="game-storyline"]/p"#)
+            .await
+        {
+            info.insert("storyline".into(), storyline_html.into());
+        } else {
+            log::warn!("could not get storyline");
+        }
+
+        let recommend_div = b
+            .get_element_by_xpath(r#"//*[@id="content-page"]/div[2]/div[2]/ul/div[2]/div"#)
+            .await
+            .unwrap();
+        let mut recommended: Vec<String> = vec![];
+        for game in recommend_div.find_all(By::Tag("li")).await.unwrap() {
+            let game_link = game.find(By::Tag("a")).await.unwrap();
+            recommended.push(b.get_url_from_link(game_link).await);
+        }
+        info.insert("recommendations".into(), recommended.into());
+
+        b.scroll_to_end().await.unwrap();
+
+        if let Some(show_all_langs) = b
+            .get_element_by_xpath(r#"//*[@class="language-supports-display"]/button"#)
+            .await
+        {
+            show_all_langs.click().await.unwrap();
+        }
+
+        for el in b.get_elements_by_xpath(r#"//*[@class="optimisly-game-extrainfo2"]/div/div/span[@class="text-purple cursor-pointer"]"#).await.unwrap() {
+            el.click().await.unwrap();
+        }
+        let mut extra_info = String::new();
+        let extra_info_html = b
+            .get_element_by_xpath(r#"//*[@class="optimisly-game-extrainfo2"]"#)
+            .await
+            .unwrap();
+        for el in extra_info_html.find_all(By::XPath("./*")).await.unwrap() {
+            extra_info.push_str(&format!("{}\n", text_from!(el)));
+        }
+
+        let mut extra_map: HashMap<String, Vec<Value>> = HashMap::new();
+        let mut extra_map_new: HashMap<String, Value> = HashMap::new();
+        let mut last = String::new();
+        for line in extra_info.lines() {
+            let line = line.trim();
+
+            if line.is_empty() {
+                continue;
+            }
+
+            if line.ends_with(':') {
+                last = remove_last_n_chars(line, 1);
+                extra_map.insert(last.clone(), vec![]);
+            } else {
+                extra_map.get_mut(&last).unwrap().push(line.into());
+            }
+        }
+
+        for key in extra_map.keys() {
+            if key == "Localized titles" {
+                let titles = extra_map.get(key).unwrap();
+                let mut title_map: Map<String, Value> = Map::new();
+                for title in titles {
+                    let (lang, val) = title.as_str().unwrap().split_once(": ").unwrap();
+                    title_map.insert(lang.into(), val.into());
+                }
+                extra_map_new.insert(key.into(), title_map.into());
+            }
+            if key == "Alternative titles" {
+                let titles = extra_map.get(key).unwrap();
+                let mut title_map: Map<String, Value> = Map::new();
+                for title in titles {
+                    let (lang, val) = title.as_str().unwrap().split_once(": ").unwrap();
+                    title_map.insert(lang.into(), val.into());
+                }
+                extra_map_new.insert(key.into(), title_map.into());
+            }
+            if key == "Keywords" {
+                let keywords = extra_map.get(key).unwrap()[0].to_string();
+                extra_map_new.insert(
+                    key.into(),
+                    remove_last_n_chars(&keywords, 1)[1..]
+                        .split(", ")
+                        .map(std::string::ToString::to_string)
+                        .collect(),
+                );
+            }
+            if key == "Supported Languages" {
+                let mut supported_langs: Map<String, Value> = json!({
+                    "audio": Vec::<String>::new(),
+                    "subtitles": Vec::<String>::new(),
+                    "interface": Vec::<String>::new()
+                })
+                .as_object()
+                .unwrap()
+                .clone();
+
+                let lang_html = b
+                    .get_element_by_xpath(r#"//*[@class="language-supports-display"]/table/tbody"#)
+                    .await
+                    .unwrap();
+
+                for lang in lang_html.find_all(By::Tag("tr")).await.unwrap() {
+                    let support = lang.find_all(By::Tag("td")).await.unwrap();
+                    let lang_name = remove_last_n_chars(&text_from!(support[0]), 1);
+                    if text_from!(support[1]) == "✓" {
+                        supported_langs
+                            .get_mut("audio")
+                            .unwrap()
+                            .as_array_mut()
+                            .unwrap()
+                            .push(lang_name.clone().into());
+                    }
+                    if text_from!(support[2]) == "✓" {
+                        supported_langs
+                            .get_mut("subtitles")
+                            .unwrap()
+                            .as_array_mut()
+                            .unwrap()
+                            .push(lang_name.clone().into());
+                    }
+                    if text_from!(support[3]) == "✓" {
+                        supported_langs
+                            .get_mut("interface")
+                            .unwrap()
+                            .as_array_mut()
+                            .unwrap()
+                            .push(lang_name.into());
+                    }
+                }
+
+                extra_map_new.insert(key.into(), supported_langs.into());
+            }
+        }
+        extra_map.remove_entry("Localized titles");
+        extra_map.remove_entry("Alternative titles");
+        extra_map.remove_entry("Keywords");
+        extra_map.remove_entry("Supported Languages");
+
+        let extra_map: HashMap<String, Value> = extra_map
+            .into_iter()
+            .map(|(old_key, value)| {
+                let new_key = escape_key(&old_key);
+                (new_key, value.into())
+            })
+            .collect();
+        let extra_map_new: HashMap<String, Value> = extra_map_new
+            .into_iter()
+            .map(|(old_key, value)| {
+                let new_key = escape_key(&old_key);
+                (new_key, value)
+            })
+            .collect();
+
+        info.extend(extra_map);
+        info.extend(extra_map_new);
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for IGDBExtractor {
+    fn name(&self) -> String {
+        "IGDB".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["www.igdb.com"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.igdb_game(url, b, conf).await
+    }
+}
--- a/src/extractors/mediamarkt.rs
+++ b/src/extractors/mediamarkt.rs
@ -0,0 +1,159 @@
+use crate::util::{currency, escape_key, remove_last_n_chars};
+
+use super::prelude::*;
+
+pub struct MediamarktExtractor {}
+
+impl MediamarktExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    async fn product(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        info.insert(
+            "title".into(),
+            b.get_element_text_by_xpath(r#"//div[@data-test="mms-select-details-header"]/h1"#)
+                .await
+                .unwrap()
+                .into(),
+        );
+
+        let product_info_elements = b
+            .get_elements_by_xpath(
+                r#"//div[@data-test="mms-select-details-header"]//p[@font-family="default"]"#,
+            )
+            .await
+            .unwrap();
+        let re = regex::Regex::new(r"[-+]?\d*\.\d+|\d+").unwrap();
+        let ratings: Vec<f64> = re
+            .find_iter(&text_from!(product_info_elements.first().unwrap()))
+            .map(|m| m.as_str().parse::<f64>().unwrap())
+            .collect();
+
+        info.insert("rating".into(), ratings[0].into());
+        info.insert("amount_of_ratings".into(), ratings[1].into());
+
+        info.insert(
+            "product_number".into(),
+            text_from!(product_info_elements[1])
+                .replace("Art.-Nr. ", "")
+                .into(),
+        );
+
+        if let Some(discount) = b
+            .get_element_text_by_xpath(
+                r#"//div[@data-test="mms-product-price"]//div[@data-test="mms-badge"]/span"#,
+            )
+            .await
+        {
+            info.insert("discount".into(), discount.into());
+        }
+        if let Some(orig_price) = b.get_element_text_by_xpath(r#"//div[@data-test="mms-product-price"]//div[@data-test="mms-badge"]/../p[1]/span[3]"#).await {
+        let orig_price = format!("{}{}", &orig_price.chars().skip(1).collect::<String>(), &orig_price.chars().take(1).collect::<String>());
+        info.insert("original_price".into(), currency(&orig_price).into());
+    }
+
+        let price = remove_last_n_chars(
+            &b.get_element_text_by_xpath(r#"//span[@data-test="branded-price-whole-value"]"#)
+                .await
+                .unwrap(),
+            1,
+        );
+        let price = format!(
+            "{}{}",
+            &price.chars().skip(2).collect::<String>(),
+            &price.chars().take(2).collect::<String>()
+        );
+        info.insert("price".into(), currency(price.trim()).into());
+
+        if let Some(price_decimal) = b
+            .get_element_text_by_xpath(r#"//span[@data-test="branded-price-decimal-value"]"#)
+            .await
+        {
+            let decimal = if price_decimal == "–" {
+                0.0
+            } else {
+                format!("0.{price_decimal}").parse::<f64>().unwrap()
+            };
+            let old_v = info
+                .get_mut("price")
+                .unwrap()
+                .as_object_mut()
+                .unwrap()
+                .get_mut("value")
+                .unwrap()
+                .as_f64()
+                .unwrap();
+            info.get_mut("price")
+                .unwrap()
+                .as_object_mut()
+                .unwrap()
+                .insert("value".into(), (old_v + decimal).into());
+        }
+
+        let mut data_information: Map<String, Value> = Map::new();
+        let features_html = b
+            .get_elements_by_xpath(r#"//div[@data-test="pdp-features-content"]/div/div/table"#)
+            .await
+            .unwrap();
+        b.scroll_to_element(features_html.first().unwrap())
+            .await
+            .unwrap();
+        for feature in features_html {
+            let title = escape_key(&text_from!(feature
+                .find(By::XPath("./thead//p"))
+                .await
+                .unwrap()));
+            data_information.insert(title.clone(), Value::Object(Map::new()));
+            b.scroll_to_element(&feature).await.unwrap();
+            for info in feature.find_all(By::XPath("./tbody/tr")).await.unwrap() {
+                b.scroll_to_element(&info).await.unwrap();
+                std::thread::sleep(std::time::Duration::from_millis(50));
+                let info_html = info.find_all(By::XPath("./td/p")).await.unwrap();
+                let key = escape_key(&text_from!(info_html[0]));
+                if key.is_empty() {
+                    continue;
+                }
+                let val = text_from!(info_html[1]);
+                data_information
+                    .get_mut(&title)
+                    .unwrap()
+                    .as_object_mut()
+                    .unwrap()
+                    .insert(key, val.into());
+            }
+        }
+        info.insert("information".into(), data_information.into());
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for MediamarktExtractor {
+    fn name(&self) -> String {
+        "MEDIAMARKT".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["www.mediamarkt.de"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.product(url, b, conf).await
+    }
+}
--- a/src/extractors/mod.rs
+++ b/src/extractors/mod.rs
@ -0,0 +1,158 @@
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use chrono::Utc;
+use serde_json::Value;
+use url::Url;
+mod amazon;
+mod anilist;
+mod anisearch;
+mod aur;
+mod igdb;
+mod mediamarkt;
+mod postman;
+mod steam;
+mod tmdb;
+
+use crate::Config;
+
+mod prelude {
+    pub use super::Extractor;
+    pub use crate::Config;
+    pub use crate::{attr_from, text_from, try_attr_from};
+    pub use async_trait::async_trait;
+    pub use serde_json::{json, Map, Value};
+    pub use std::collections::HashMap;
+    pub use std::time::Duration;
+    pub use thirtyfour::By;
+    pub use url::Url;
+}
+
+#[async_trait]
+/// A trait for defining custom extractors to scrape data from web pages.
+pub trait Extractor {
+    /// Checks if the provided URL can be handled by this extractor.
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - The URL to be matched against the extractor's capabilities.
+    ///
+    /// # Returns
+    ///
+    /// Returns `true` if the extractor can handle the given URL, otherwise `false`
+    fn match_url(&self, url: &Url) -> bool {
+        if let Some(host_str) = url.host_str() {
+            if self.supported_hosts().contains(&host_str) {
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Retrieves supported hosts for the extractor
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Vec` of hosts supported by this extractor.
+    fn supported_hosts(&self) -> Vec<&str>;
+
+    /// Retrieves the name of the extractor.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `String` containing the name of the extractor.
+    fn name(&self) -> String;
+
+    /// Performs the web scraping operation on the provided URL using the given browser
+    /// instance and configuration.
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - The URL to perform scraping on.
+    /// * `browser` - A mutable reference to the browser instance used for scraping.
+    /// * `conf` - A reference to the configuration settings for scraping.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Result` indicating either a successful scraping operation with a
+    /// `HashMap` containing extracted data, or an error message as a `String` if the
+    /// operation fails.
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String>;
+}
+
+/// Get a list of all extractors registered.
+#[must_use]
+pub fn get_extractors() -> Vec<Box<dyn Extractor>> {
+    vec![
+        Box::new(igdb::IGDBExtractor::new()),
+        Box::new(aur::AURExtractor::new()),
+        Box::new(amazon::AmazonExtractor::new()),
+        Box::new(anilist::AnilistExtractor::new()),
+        Box::new(anisearch::AnisearchExtractor::new()),
+        Box::new(mediamarkt::MediamarktExtractor::new()),
+        Box::new(postman::PostmanExtractor::new()),
+        Box::new(steam::SteamExtractor::new()),
+        Box::new(tmdb::TmdbExtractor::new()),
+    ]
+}
+
+pub async fn scrape_url(url: &str, conf: &Config) {
+    let p_url = Url::parse(url).expect("Invalid URL");
+
+    let ts = Utc::now();
+
+    let mut data: Option<Result<HashMap<String, Value>, String>> = None;
+
+    let mut browser = crate::Browser::new(conf).await.unwrap();
+
+    if let Some(force_ext) = &conf.force_extractor {
+        let extractors = get_extractors();
+        let ex = extractors
+            .iter()
+            .find(|x| x.name() == *force_ext)
+            .unwrap()
+            .to_owned();
+        log::info!("Scraping '{}'", p_url.to_string());
+        log::info!("Using extractor {}", ex.name());
+        data = Some(ex.run_scrape(p_url, &mut browser, conf).await);
+    } else {
+        for x in get_extractors() {
+            if x.match_url(&p_url) {
+                log::info!("Scraping '{}'", p_url.to_string());
+                log::info!("Using extractor {}", x.name());
+                data = Some(x.run_scrape(p_url, &mut browser, conf).await);
+                break;
+            }
+        }
+    }
+
+    browser.quit().await;
+
+    if data.is_none() {
+        log::error!("Site not supported");
+        std::process::exit(1);
+    }
+
+    let data = data.unwrap();
+
+    let mut data = match data {
+        Ok(data) => data,
+        Err(e) => {
+            let mut h = HashMap::new();
+            log::error!("Scrape failed: {e}");
+            h.insert("error".into(), e.into());
+            h
+        }
+    };
+
+    if conf.save_ts {
+        data.insert("scraped_at".to_string(), ts.timestamp_nanos().into());
+    }
+
+    println!("{}", serde_json::to_string(&data).unwrap());
+}
--- a/src/extractors/postman.rs
+++ b/src/extractors/postman.rs
@ -0,0 +1,322 @@
+use crate::util::{escape_unsafe_characters, handle_media_url, remove_last_n_chars, window};
+
+use super::prelude::*;
+
+pub struct PostmanExtractor {}
+
+impl PostmanExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PostmanExtractor {
+    async fn torrent(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        let info_table_html = b
+            .get_element_by_xpath(r#"//*[@id="td_props"]/tbody"#)
+            .await
+            .ok_or("could not get info table")?;
+        let entries = info_table_html.find_all(By::Tag("tr")).await.unwrap();
+        for entry in entries.iter().take(entries.len() - 1) {
+            if let Ok(key_name_el) = entry.find(By::XPath(r#"./td[@class="label"]/b"#)).await {
+                let key_name = text_from!(key_name_el);
+                let content = text_from!(entry.find(By::XPath("./td[2]")).await.unwrap());
+                match key_name.as_str() {
+                    "Name:" => {
+                        info.insert("name".into(), content.into());
+                    }
+                    "Torrent file:" => {
+                        let torrent_file_url = b
+                            .get_url_from_link(entry.find(By::XPath("./td[2]/a[1]")).await.unwrap())
+                            .await;
+                        info.insert(
+                            "torrent_file".into(),
+                            handle_media_url(&torrent_file_url, &content, true, conf)
+                                .await
+                                .into(),
+                        );
+                    }
+                    "Magnet:" => {
+                        info.insert(
+                            "magnet_url".into(),
+                            attr_from!(
+                                entry.find(By::XPath("./td[2]/a[1]")).await.unwrap(),
+                                "href"
+                            )
+                            .into(),
+                        );
+                    }
+                    "Infohash:" => {
+                        info.insert("infohash".into(), content.into());
+                    }
+                    "Size:" => {
+                        info.insert("size".into(), content.into());
+                    }
+                    "Owner:" => {
+                        if content != "hidden" && content != "none (abandoned torrent)" {
+                            info.insert("owner".into(), content.into());
+                            let level = attr_from!(
+                                entry
+                                    .find(By::XPath("./td[2]/span[1]/img[1]"))
+                                    .await
+                                    .unwrap(),
+                                "src"
+                            );
+                            info.insert(
+                                "owner_level".into(),
+                                remove_last_n_chars(level.split('/').last().unwrap(), 4)
+                                    .parse::<isize>()
+                                    .unwrap()
+                                    .into(),
+                            );
+                        }
+                    }
+                    "Main Languages:" => {
+                        let languages_html =
+                            entry.find_all(By::XPath("./td[2]/span")).await.unwrap();
+                        let mut languages: Vec<String> = vec![];
+                        for lang in languages_html {
+                            languages.push(attr_from!(lang, "title"));
+                        }
+                        info.insert("main_languages".into(), languages.into());
+                    }
+                    "Subtitle Languages:" => {
+                        let languages_html =
+                            entry.find_all(By::XPath("./td[2]/span")).await.unwrap();
+                        let mut languages: Vec<String> = vec![];
+                        for lang in languages_html {
+                            languages.push(attr_from!(lang, "title"));
+                        }
+                        info.insert("subtitle_languages".into(), languages.into());
+                    }
+                    "Hits / Downloads:" => {
+                        let (hits_amount, downloads_amount) = content.split_once(" / ").unwrap();
+                        info.insert(
+                            "hits_amount".into(),
+                            hits_amount.parse::<isize>().unwrap().into(),
+                        );
+                        info.insert(
+                            "downloads_amount".into(),
+                            downloads_amount.parse::<isize>().unwrap().into(),
+                        );
+                    }
+                    "Seeders / Leechers:" => {
+                        let (seeders_amount, leechers_amount) = content.split_once(" / ").unwrap();
+                        info.insert(
+                            "seeders_amount".into(),
+                            seeders_amount.parse::<isize>().unwrap().into(),
+                        );
+                        info.insert(
+                            "leechers_amount".into(),
+                            leechers_amount.parse::<isize>().unwrap().into(),
+                        );
+                    }
+                    "Added / Last Active:" => {
+                        let (added_timestamp, last_active_timestamp) =
+                            content.split_once(" / ").unwrap();
+                        info.insert("added_timestamp".into(), added_timestamp.into());
+                        info.insert(
+                            "last_active_timestamp".into(),
+                            (if last_active_timestamp == "No active seeders in DB" {
+                                None
+                            } else {
+                                Some(last_active_timestamp)
+                            })
+                            .into(),
+                        );
+                    }
+                    "Rating:" => {
+                        info.insert(
+                            "rating".into(),
+                            attr_from!(
+                                entry
+                                    .find(By::XPath(r#"./td[2]/span[@id="ratingbars"]"#))
+                                    .await
+                                    .unwrap(),
+                                "title"
+                            )
+                            .split_whitespace()
+                            .next()
+                            .unwrap()
+                            .parse::<f64>()
+                            .unwrap()
+                            .into(),
+                        );
+                    }
+                    "Description:" => {
+                        info.insert("description".into(), content.into());
+                    }
+                    "Category:" => {
+                        info.insert("category".into(), content.into());
+                    }
+                    "Subtitles:" => {
+                        if !content.is_empty() {
+                            info.insert("subtitles".into(), content.into());
+                        }
+                    }
+                    "Length:" => {
+                        if !content.is_empty() {
+                            info.insert("length".into(), content.into());
+                        }
+                    }
+                    "Genre:" => {
+                        if !content.is_empty() {
+                            info.insert("genre".into(), content.into());
+                        }
+                    }
+                    "Codec:" => {
+                        if !content.is_empty() {
+                            info.insert("codec".into(), content.into());
+                        }
+                    }
+                    "Ripper Info:" => {
+                        if !content.is_empty() {
+                            info.insert("ripper_info".into(), content.into());
+                        }
+                    }
+                    "Format:" => {
+                        if !content.is_empty() {
+                            info.insert("format".into(), content.into());
+                        }
+                    }
+                    "Bitrate:" => {
+                        if !content.is_empty() {
+                            info.insert("bitrate".into(), content.into());
+                        }
+                    }
+                    "Banned:" => {
+                        info.insert("banned".into(), (content == "yes").into());
+                    }
+                    "Immutable:" => {
+                        info.insert("immutable".into(), (content == "yes").into());
+                    }
+                    "Visible:" => {
+                        info.insert("visible".into(), (content == "yes").into());
+                    }
+                    "Comment Handling:" => {}
+                    _ => {
+                        log::debug!("unknown key {key_name}");
+                    }
+                }
+            }
+        }
+
+        let mut files: Vec<Map<String, Value>> = vec![];
+        let files_info_html: Vec<_> = b
+            .get_elements_by_xpath(r#"//*[@id="td_files"]/tbody/*"#)
+            .await
+            .ok_or("could not get files info")?
+            .into_iter()
+            .skip(1)
+            .collect();
+        for entry in files_info_html {
+            let file_name = text_from!(entry.find(By::XPath("./td[1]")).await.unwrap());
+            let file_size = text_from!(entry.find(By::XPath("./td[2]")).await.unwrap());
+            files.push(
+                json!({
+                    "file_name": file_name,
+                    "file_size": file_size
+                })
+                .as_object()
+                .unwrap()
+                .clone(),
+            );
+        }
+        info.insert("files".into(), files.into());
+
+        let mut attachments: Map<String, Value> = Map::new();
+        let attachment_html = b
+            .get_element_by_xpath(r#"//table[@id="td_attachments"]/tbody/tr[1]/td[1]"#)
+            .await
+            .ok_or("could not get attachments")?;
+        for el in attachment_html
+            .find_all(By::XPath("./a/img"))
+            .await
+            .unwrap()
+        {
+            let attachment_title = attr_from!(el, "title");
+            let attachment_url = b.get_absolute_url(&attr_from!(el, "src")).await;
+            attachments.insert(
+                attachment_title.clone(),
+                handle_media_url(
+                    &attachment_url,
+                    &format!("{}.png", escape_unsafe_characters(&attachment_title)),
+                    true,
+                    conf,
+                )
+                .await
+                .into(),
+            );
+        }
+        info.insert("attachments".into(), attachments.into());
+
+        if let Some(comments_html) = b.get_element_by_xpath(r#"//*[@id="comments"]/tbody"#).await {
+            let mut comments: Vec<Map<String, Value>> = vec![];
+            for comment in window(&comments_html.find_all(By::Tag("tr")).await.unwrap(), 2) {
+                let comment_user =
+                    text_from!(comment[0].find(By::XPath("./th/b/i")).await.unwrap());
+                let comment_ts = text_from!(comment[0]
+                    .find(By::XPath(r#"./th/span[@class="commentdate"]"#))
+                    .await
+                    .unwrap())[7..]
+                    .to_owned();
+                let mut comment_content = text_from!(comment[1]
+                    .find(By::XPath(r#"./td/span[@class="commenttext"]"#))
+                    .await
+                    .unwrap());
+                let comment_content_html = comment[1]
+                    .find_all(By::XPath(r#"./td/span[@class="commenttext"]/a"#))
+                    .await
+                    .unwrap();
+                for el in comment_content_html {
+                    if el.tag_name().await.unwrap().as_str() == "a" {
+                        let link = format!("[{}]({})", text_from!(el), attr_from!(el, "href"));
+                        comment_content = comment_content.replace(&text_from!(el), &link);
+                    }
+                }
+                comments.push(
+                    json!({
+                        "user": comment_user,
+                        "timestamp": comment_ts,
+                        "content": comment_content
+                    })
+                    .as_object()
+                    .unwrap()
+                    .clone(),
+                );
+            }
+            info.insert("comments".into(), comments.into());
+        }
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for PostmanExtractor {
+    fn name(&self) -> String {
+        "POSTMAN".to_owned()
+    }
+
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["tracker2.postman.i2p"]
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.torrent(url, b, conf).await
+    }
+}
--- a/src/extractors/steam.rs
+++ b/src/extractors/steam.rs
@ -0,0 +1,145 @@
+use crate::util::{currency, parse_date};
+
+use super::prelude::*;
+
+pub struct SteamExtractor;
+
+impl SteamExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    async fn steam_game(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        let mut url = url;
+        let lang = conf.language.clone();
+
+        match lang {
+            crate::Language::de_DE => {
+                url.query_pairs_mut().append_pair("l", "german");
+            }
+            crate::Language::en_US => {
+                url.query_pairs_mut().append_pair("l", "english");
+            }
+        }
+        log::info!(
+            "Changing to '{url}' because of {:?} language",
+            conf.language
+        );
+
+        b.goto(url.to_string()).await.unwrap();
+
+        if b.current_url().await.unwrap().path().contains("agecheck") {
+            log::info!("Game is behind age restriction");
+            let year = b
+                .get_element_by_xpath(r#"//*[@id="ageYear"]"#)
+                .await
+                .unwrap();
+            thirtyfour::components::SelectElement::new(&year)
+                .await
+                .unwrap()
+                .select_by_value("1900")
+                .await
+                .unwrap();
+
+            b.click_on_xpath(r#"//*[@id="view_product_page_btn"]"#)
+                .await;
+
+            b.wait_for(r#"//*[@id="appHubAppName"]"#, Duration::from_secs(5))
+                .await;
+            b.goto(url.to_string()).await.unwrap();
+        }
+
+        let game_name = b
+            .get_element_text_by_xpath(r#"//*[@id="appHubAppName"]"#)
+            .await
+            .unwrap();
+
+        let game_description = b
+            .get_element_text_by_xpath(r#"//*[@class="game_description_snippet"]"#)
+            .await
+            .unwrap();
+
+        let game_release = b
+            .get_element_text_by_xpath(r#"//*[@class="release_date"]/div[2]"#)
+            .await
+            .unwrap();
+
+        let game_release = match lang {
+            crate::Language::de_DE => parse_date(&game_release, "%d. %b. %Y").unwrap(),
+            crate::Language::en_US => parse_date(&game_release, "%d %b, %Y").unwrap(),
+        };
+
+        let game_developer = b
+            .get_element_text_by_xpath(r#"//*[@id="developers_list"]/a"#)
+            .await
+            .unwrap();
+        let game_publisher = text_from!(b
+            .get_elements_by_xpath(r#"//*[@class="dev_row"]/div[2]"#)
+            .await
+            .unwrap()
+            .get(1)
+            .unwrap());
+
+        let mut game_price: Option<Map<String, Value>> = None;
+
+        if let Some(game_orig_price_html) = b.get_elements_by_xpath(r#"//*[@class="game_area_purchase_game_wrapper"]/div/div[2]/div/div[1]/div[2]/*[@class="discount_original_price"]"#).await.unwrap().first() {
+            let game_orig_price = text_from!(game_orig_price_html);
+            let game_discount_price = text_from!(b.get_elements_by_xpath(r#"//*[@class="game_area_purchase_game_wrapper"]/div/div[2]/div/div[1]/div[2]/*[@class="discount_final_price"]"#).await.unwrap().first().unwrap());
+            game_price = Some(json!({
+                "original_price": currency(&game_orig_price),
+            "discount_price": currency(&game_discount_price),
+            }).as_object().unwrap().clone());
+        } else if let Some(game_price_html) = b.get_element_text_by_xpath(r#"//*[@class="game_area_purchase_game_wrapper"]/div/div[2]/div/*[@class="game_purchase_price price"]"#).await {
+                game_price = Some(currency(&game_price_html));
+                } else {
+                let check_free_price = b.get_element_text_by_xpath(r#"//*[@class="game_purchase_action"]/div[1]/div[@class="game_purchase_price price"]"#).await.unwrap();
+                match lang {
+                    crate::Language::de_DE => {
+                        if check_free_price == "Kostenlos" {
+                            game_price = Some(currency("0.0€"));
+                        }
+                    },
+                    crate::Language::en_US => {
+                        if check_free_price == "Free" {
+                            game_price = Some(currency("0.0$"));
+                        }
+                    }
+                }
+            }
+
+        let mut info = HashMap::new();
+        info.insert("name".into(), game_name.into());
+        info.insert("description".into(), game_description.into());
+        info.insert("release".into(), game_release.into());
+        info.insert("developer".into(), game_developer.into());
+        info.insert("publisher".into(), game_publisher.into());
+        info.insert("price".into(), game_price.into());
+
+        Ok(info)
+    }
+}
+
+#[async_trait]
+impl Extractor for SteamExtractor {
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["store.steampowered.com"]
+    }
+
+    fn name(&self) -> String {
+        "STEAM".to_string()
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.steam_game(url, browser, conf).await
+    }
+}
--- a/src/extractors/tmdb.rs
+++ b/src/extractors/tmdb.rs
@ -0,0 +1,338 @@
+use crate::util::{
+    escape_key, extract_attrs_from_elements, extract_texts_from_elements, handle_media_url,
+    parse_date, remove_last_n_chars,
+};
+
+use super::prelude::*;
+
+pub struct TmdbExtractor;
+
+impl TmdbExtractor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    async fn series(
+        &self,
+        url: Url,
+        b: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        b.goto(url.to_string()).await.unwrap();
+
+        let prefs = json!({
+            "i18n_fallback_language": "en-US",
+            "locale": "en-US",
+            "country_code": "US",
+        });
+        let prefs_str = serde_json::to_string(&prefs).unwrap();
+        let lang_cookie =
+            thirtyfour::Cookie::build("tmdb.prefs", urlencoding::encode(&prefs_str).into_owned())
+                .domain("www.themoviedb.org")
+                .path("/")
+                .expires(None)
+                .secure(true)
+                .http_only(true)
+                .same_site(thirtyfour::cookie::SameSite::Lax)
+                .finish();
+        b.delete_cookie("tmdb.prefs").await.unwrap();
+        b.add_cookie(lang_cookie).await.unwrap();
+        b.refresh().await.unwrap();
+
+        let mut info: HashMap<String, Value> = HashMap::new();
+
+        if b.get_element_by_xpath(r#"//*[@id="main"]//div[@class="error_wrapper"]"#)
+            .await
+            .is_some()
+        {
+            Err("page unavailable")?;
+        }
+
+        info.insert(
+            "title".into(),
+            b.get_element_text_by_xpath(
+                r#"//*[@id="original_header"]//section[@class="header poster"]/div/h2/a"#,
+            )
+            .await
+            .unwrap()
+            .into(),
+        );
+        info.insert(
+            "release_year".into(),
+            remove_last_n_chars(
+                &b.get_element_text_by_xpath(
+                    r#"//*[@id="original_header"]//section[@class="header poster"]/div/h2/span"#,
+                )
+                .await
+                .unwrap()[1..],
+                1,
+            )
+            .into(),
+        );
+
+        let rating_html = attr_from!(
+            b.get_element_by_xpath(r#"//div[@class="user_score_chart"]/div[1]/span"#)
+                .await
+                .unwrap(),
+            "class"
+        );
+        let rating = rating_html.split("icon-r").nth(1).unwrap();
+        info.insert(
+            "user_rating".into(),
+            rating.parse::<isize>().unwrap().into(),
+        );
+
+        if let Some(age_certification) = b
+            .get_element_text_by_xpath(
+                r#"//*[@id="original_header"]//span[@class="certification"]"#,
+            )
+            .await
+        {
+            info.insert("age_certification".into(), age_certification.into());
+        }
+
+        let genres_html = b
+            .get_elements_by_xpath(r#"//*[@id="original_header"]//span[@class="genres"]/a"#)
+            .await
+            .unwrap();
+        info.insert(
+            "genres".into(),
+            extract_texts_from_elements(genres_html).await.into(),
+        );
+
+        info.insert(
+            "overview".into(),
+            b.get_element_text_by_xpath(r#"//*[@id="original_header"]//div[@class="overview"]"#)
+                .await
+                .unwrap()
+                .into(),
+        );
+
+        let cover_url = b
+            .get_absolute_url(
+                &attr_from!(
+                    b.get_element_by_xpath(
+                        r#"//*[@id="original_header"]//div[@class="poster"]//img"#
+                    )
+                    .await
+                    .unwrap(),
+                    "src"
+                )
+                .replace("_filter(blur)", ""),
+            )
+            .await;
+        info.insert(
+            "cover".into(),
+            handle_media_url(&cover_url, "cover", false, conf)
+                .await
+                .into(),
+        );
+
+        for fact in b
+            .get_elements_by_xpath(r#"//*[@id="media_v4"]//section[@class="facts left_column"]/p"#)
+            .await
+            .unwrap()
+        {
+            if let Ok(key) = fact.find(By::XPath("./strong")).await {
+                let key = text_from!(key);
+                if key == "Networks" {
+                    continue;
+                }
+                info.insert(
+                    escape_key(&key),
+                    text_from!(fact).replace(&format!("{key}\n"), "").into(),
+                );
+            }
+        }
+
+        let mut tags: Vec<String> = vec![];
+        for tag in b
+            .get_elements_by_xpath(
+                r#"//*[@id="media_v4"]//section[@class="keywords right_column"]/ul[1]/li"#,
+            )
+            .await
+            .unwrap()
+        {
+            tags.push(text_from!(tag.find(By::XPath("./a")).await.unwrap()));
+        }
+        info.insert("tags".into(), tags.into());
+
+        let all_seasons_url = b
+            .get_url_from_link(
+                b.get_element_by_xpath(
+                    r#"//*[@id="media_v4"]//section[@class="panel season"]/p[1]/a"#,
+                )
+                .await
+                .unwrap(),
+            )
+            .await;
+        b.goto(all_seasons_url).await.unwrap();
+
+        let mut seasons: Vec<Map<String, Value>> = vec![];
+        let mut seasons_urls: Vec<String> = vec![];
+        for s in extract_attrs_from_elements(
+            b.get_elements_by_xpath(
+                r#"//*[@id="media_v4"]//div[@class="season_wrapper"]/section/div/a"#,
+            )
+            .await
+            .unwrap(),
+            "href",
+        )
+        .await
+        {
+            seasons_urls.push(b.get_absolute_url(&s).await);
+        }
+
+        for season in seasons_urls {
+            let season_data = self.season_page(season, b, conf).await?;
+            seasons.push(season_data);
+        }
+
+        info.insert("seasons".into(), seasons.into());
+
+        Ok(info)
+    }
+
+    async fn season_page(
+        &self,
+        url: String,
+        b: &mut crate::Browser,
+        _conf: &Config,
+    ) -> Result<Map<String, Value>, String> {
+        b.goto(url.clone()).await.unwrap();
+
+        let mut season: Map<String, Value> = Map::new();
+        season.insert(
+            "season_number".into(),
+            url.split('/')
+                .last()
+                .unwrap()
+                .parse::<isize>()
+                .unwrap()
+                .into(),
+        );
+        season.insert(
+            "title".into(),
+            b.get_element_text_by_xpath(
+                r#"//*[@id="main"]//span[@class="flex poster"]/span/div/h2/a"#,
+            )
+            .await
+            .unwrap()
+            .into(),
+        );
+        if let Some(release_year) = b
+            .get_element_text_by_xpath(
+                r#"//*[@id="main"]//span[@class="flex poster"]/span/div/h2/span"#,
+            )
+            .await
+        {
+            season.insert(
+                "release_year".into(),
+                remove_last_n_chars(&release_year[1..], 1).into(),
+            );
+        }
+
+        season.insert(
+            "amount_of_episodes".into(),
+            b.get_element_text_by_xpath(
+                r#"//*[@id="main_column"]//h3[@class="episode_sort space"]/span"#,
+            )
+            .await
+            .unwrap()
+            .parse::<isize>()
+            .unwrap()
+            .into(),
+        );
+
+        let mut episodes: Vec<Map<String, Value>> = vec![];
+        let episodes_html = b
+            .get_element_by_xpath(r#"//*[@id="main_column"]//div[@class="episode_list"]"#)
+            .await
+            .unwrap();
+        for e in episodes_html
+            .find_all(By::XPath(r#"./div[@class="card"]"#))
+            .await
+            .unwrap()
+        {
+            let mut episode: Map<String, Value> = Map::new();
+            episode.insert(
+                "episode_number".into(),
+                text_from!(e
+                    .find(By::XPath(r#".//span[@class="episode_number"]"#))
+                    .await
+                    .unwrap())
+                .parse::<isize>()
+                .unwrap()
+                .into(),
+            );
+            episode.insert(
+                "title".into(),
+                text_from!(e
+                    .find(By::XPath(r#".//div[@class="episode_title"]//a"#))
+                    .await
+                    .unwrap())
+                .into(),
+            );
+            episode.insert(
+                "rating".into(),
+                text_from!(e
+                    .find(By::XPath(
+                        r#".//div[@class="episode_title"]/div[1]/div[1]/div[1]"#
+                    ))
+                    .await
+                    .unwrap())
+                .parse::<f64>()
+                .unwrap()
+                .into(),
+            );
+            if let Ok(release_date) = e
+                .find(By::XPath(
+                    r#".//div[@class="episode_title"]//div[@class="date"]/span[@class="date"]"#,
+                ))
+                .await
+            {
+                episode.insert(
+                    "release_date".into(),
+                    parse_date(&text_from!(release_date), "%B %d, %Y").into(),
+                );
+            }
+            episode.insert("runtime".into(), text_from!(e.find(By::XPath(r#".//div[@class="episode_title"]//div[@class="date"]/span[@class="runtime"]"#)).await.unwrap()).into());
+
+            episode.insert(
+                "overview".into(),
+                text_from!(e
+                    .find(By::XPath(
+                        r#".//div[@class="info"]//div[@class="overview"]/p"#
+                    ))
+                    .await
+                    .unwrap())
+                .into(),
+            );
+            episodes.push(episode);
+        }
+
+        season.insert("episodes".into(), episodes.into());
+
+        Ok(season)
+    }
+}
+
+#[async_trait]
+impl Extractor for TmdbExtractor {
+    fn supported_hosts(&self) -> Vec<&str> {
+        vec!["www.themoviedb.org"]
+    }
+
+    fn name(&self) -> String {
+        "TMDB".to_string()
+    }
+
+    async fn run_scrape(
+        &self,
+        url: Url,
+        browser: &mut crate::Browser,
+        conf: &Config,
+    ) -> Result<HashMap<String, Value>, String> {
+        self.series(url, browser, conf).await
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,356 @@
+use std::{
+    ops::Deref,
+    process::{Child, Command, Stdio},
+    str::FromStr,
+};
+use strum::EnumVariantNames;
+pub mod extractors;
+pub mod util;
+use thirtyfour::prelude::*;
+
+/// A convenience macro for extracting text content from a web element expression.
+///
+/// This macro takes a single expression `$expr` that represents a web element. It uses
+/// the `.text()` method to asynchronously extract text content from the web element and
+/// immediately unwraps the result. This macro is useful for simplifying the process of
+/// extracting text content from web elements.
+#[macro_export]
+macro_rules! text_from {
+    ($expr:expr) => {
+        $expr.text().await.unwrap()
+    };
+}
+
+/// A convenience macro for extracting an attribute's value from a web element expression.
+///
+/// This macro takes two expressions as arguments: `$expr`, which represents a web element,
+/// and `$attr`, which is the name of the attribute to extract. It uses the `.attr()` method
+/// to asynchronously extract the value of the specified attribute from the web element and
+/// immediately unwraps the result. This macro simplifies the process of attribute extraction.
+///
+/// # Note
+///
+/// If the attribute is not present, the macro will panic when trying to unwrap it.
+#[macro_export]
+macro_rules! attr_from {
+    ($expr:expr, $attr:expr) => {
+        $expr.attr($attr).await.unwrap().unwrap()
+    };
+}
+
+/// A convenience macro for attempting to extract an attribute's value from a web element expression.
+///
+/// This macro takes two expressions as arguments: `$expr`, which represents a web element,
+/// and `$attr`, which is the name of the attribute to extract. It uses the `.attr()` method
+/// to asynchronously extract the value of the specified attribute from the web element. If the
+/// attribute is not present, it returns an `Option` with `None`.
+///
+/// # Note
+///
+/// This macro returns an `Option` containing the attribute value, which can be either `Some(value)`
+/// or `None` if the attribute is absent.
+#[macro_export]
+macro_rules! try_attr_from {
+    ($expr:expr, $attr:expr) => {
+        $expr.attr($attr).await.unwrap()
+    };
+}
+
+/// A struct representing a web browser instance.
+pub struct Browser {
+    driver: Option<WebDriver>,
+    cmd: Child,
+}
+
+impl Browser {
+    pub async fn new(conf: &Config) -> Option<Self> {
+        // TODO : setup http proxy
+        let mut caps = DesiredCapabilities::chrome();
+        let child = Command::new("chromedriver")
+            .args(vec!["-p", "9515"])
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn()
+            .ok()?;
+        if let Some(http_proxy) = conf.http_proxy.clone() {
+            let (_host, _port) = http_proxy.split_once(':').unwrap();
+            caps.add_chrome_arg(&format!("--proxy-server={http_proxy}"))
+                .unwrap();
+        }
+
+        let driver = WebDriver::new("http://localhost:9515", caps).await.ok()?;
+        Some(Self {
+            driver: Some(driver),
+            cmd: child,
+        })
+    }
+
+    /// Scrolls to the end of the web page using the browser's `WebDriver`.
+    ///
+    /// This asynchronous method is used to scroll to the bottom of a web page by executing a JavaScript
+    /// script using the browser's `WebDriver`. It takes no arguments and returns a `Result` containing
+    /// either a `ScriptRet` indicating the script execution result or a `WebDriverError` if an error occurs.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing either a `ScriptRet` indicating the script execution result or a `WebDriverError`.
+    pub async fn scroll_to_end(&self) -> Result<ScriptRet, WebDriverError> {
+        self.driver
+            .as_ref()
+            .unwrap()
+            .execute(
+                "window.scrollTo(0, document.body.scrollHeight);",
+                Vec::new(),
+            )
+            .await
+    }
+
+    /// Clicks on a web element using its `XPath` expression.
+    ///
+    /// This asynchronous method is used to locate a web element using its `XPath` expression,
+    /// and then perform a click action on it.
+    ///
+    /// The method does not return any value but panics if the operation fails.
+    ///
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web element.
+    pub async fn click_on_xpath(&self, xpath: &str) {
+        self.get_element_by_xpath(xpath)
+            .await
+            .unwrap()
+            .click()
+            .await
+            .unwrap();
+    }
+
+    /// Scrolls to a specific web element using the browser's `WebDriver`.
+    ///
+    /// This asynchronous method is used to scroll to a specific web element by executing a JavaScript
+    /// script using the browser's `WebDriver`.
+    ///
+    /// The method returns a `Result` containing either a `ScriptRet` indicating the script execution
+    /// result or a `WebDriverError` if an error occurs.
+    ///
+    /// # Arguments
+    ///
+    /// * `e` - A reference to the web element to scroll into view.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing either a `ScriptRet` indicating the script execution result or a `WebDriverError`.
+    pub async fn scroll_to_element(&self, e: &WebElement) -> Result<ScriptRet, WebDriverError> {
+        self.driver
+            .as_ref()
+            .unwrap()
+            .execute("arguments[0].scrollIntoView();", vec![e.to_json()?])
+            .await
+    }
+
+    /// Retrieves a complete URL from a link element using the browser's `WebDriver`.
+    ///
+    /// This asynchronous method is used to retrieve a complete URL from a web element
+    /// by extracting its "href" attribute and appending it to the base URL of the current page.
+    ///
+    /// # Arguments
+    ///
+    /// * `el` - The web element from which to extract the URL.
+    ///
+    /// # Returns
+    ///
+    /// A string representing the complete URL derived from the link element.
+    pub async fn get_url_from_link(&self, el: WebElement) -> String {
+        let url = attr_from!(el, "href");
+        self.get_absolute_url(&url).await
+    }
+
+    /// Converts a URL to an absolute URL based on the current page's URL.
+    ///
+    /// This function takes a relative or absolute `url` as input and returns the
+    /// corresponding absolute URL. If the input `url` is a relative URL, it is converted
+    /// to an absolute URL using the current page's URL as the base. If the input `url` is
+    /// already an absolute URL, it is returned as is.
+    ///
+    /// # Parameters
+    ///
+    /// - `url`: A string slice representing the relative or absolute URL to be converted.
+    ///
+    /// # Returns
+    ///
+    /// A `String` containing the absolute URL.
+    pub async fn get_absolute_url(&self, url: &str) -> String {
+        if let Err(url::ParseError::RelativeUrlWithoutBase) = url::Url::parse(url) {
+            let mut current_url = self.current_url().await.unwrap();
+            current_url.set_query(None);
+            current_url.set_path(url);
+            return current_url.to_string();
+        }
+        url.to_string()
+    }
+
+    /// Waits for a web element to be present using the browser's `WebDriver`.
+    ///
+    /// This asynchronous method is used to wait for a web element to be present in the
+    /// DOM using its `XPath` expression. It takes an `XPath` string and a timeout duration
+    /// as arguments and performs the following actions:
+    ///
+    /// 1. Queries for the web element using the provided `XPath` expression.
+    /// 2. Waits for the web element to exist within the specified timeout duration.
+    /// 3. Returns `true` if the web element is found within the timeout, otherwise `false`.
+    ///
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web element.
+    /// * `timeout` - The maximum duration to wait for the web element to appear.
+    ///
+    /// # Returns
+    ///
+    /// A boolean value indicating whether the web element was found within the timeout.
+    pub async fn wait_for(&self, xpath: &str, timeout: std::time::Duration) -> bool {
+        self.driver
+            .as_ref()
+            .unwrap()
+            .query(By::XPath(xpath))
+            .wait(timeout, std::time::Duration::new(0, 500))
+            .exists()
+            .await
+            .unwrap_or(false)
+    }
+
+    /// Retrieves the text content of a web element using its `XPath` expression.
+    ///
+    /// This asynchronous method is used to locate a web element using its `XPath` expression
+    /// and retrieve its text content.
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web element.
+    ///
+    /// # Returns
+    ///
+    /// An `Option` containing the text content of the web element, or `None` if not found.
+    pub async fn get_element_text_by_xpath(&self, xpath: &str) -> Option<String> {
+        self.find(By::XPath(xpath)).await.ok()?.text().await.ok()
+    }
+
+    /// Retrieves the value of a specific attribute from a web element using its `XPath` expression.
+    ///
+    /// This asynchronous method is used to locate a web element using its `XPath` expression
+    /// and retrieve the value of a specific attribute.
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web element.
+    /// * `attr` - The name of the attribute to retrieve.
+    ///
+    /// # Returns
+    ///
+    /// An `Option` containing the value of the specified attribute, or `None` if not found.
+    pub async fn get_element_attr_by_xpath(&self, xpath: &str, attr: &str) -> Option<String> {
+        self.find(By::XPath(xpath))
+            .await
+            .ok()?
+            .attr(attr)
+            .await
+            .ok()?
+    }
+
+    /// Retrieves a list of web elements using their `XPath` expression.
+    ///
+    /// This asynchronous method is used to locate multiple web elements using a common `XPath` expression.
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web elements.
+    ///
+    /// # Returns
+    ///
+    /// An `Option` containing a vector of located web elements, or `None` if none are found.
+    pub async fn get_elements_by_xpath(&self, xpath: &str) -> Option<Vec<WebElement>> {
+        self.find_all(By::XPath(xpath)).await.ok()
+    }
+
+    /// Retrieves a single web element using its `XPath` expression.
+    ///
+    /// This asynchronous method is used to locate a single web element using its `XPath` expression.
+    /// # Arguments
+    ///
+    /// * `xpath` - The `XPath` expression used to locate the web element.
+    ///
+    /// # Returns
+    ///
+    /// An `Option` containing the located web element, or `None` if not found.
+    pub async fn get_element_by_xpath(&self, xpath: &str) -> Option<WebElement> {
+        self.find(By::XPath(xpath)).await.ok()
+    }
+
+    /// Quits the browser instance and `WebDriver` process.
+    ///
+    /// This asynchronous method is used to gracefully quit the browser instance and the associated `WebDriver` process.
+    pub async fn quit(mut self) {
+        let b = self.driver.take().unwrap();
+        b.quit().await.unwrap();
+        self.cmd.kill().unwrap();
+    }
+}
+
+impl Deref for Browser {
+    type Target = WebDriver;
+
+    fn deref(&self) -> &Self::Target {
+        self.driver.as_ref().unwrap()
+    }
+}
+
+#[allow(non_camel_case_types)]
+#[derive(Debug, EnumVariantNames, Clone)]
+pub enum Language {
+    en_US,
+    de_DE,
+}
+
+impl FromStr for Language {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "en_US" => Ok(Language::en_US),
+            "de_DE" => Ok(Language::de_DE),
+            _ => Err(()),
+        }
+    }
+}
+
+impl ToString for Language {
+    fn to_string(&self) -> String {
+        match self {
+            Language::en_US => "en_US".to_string(),
+            Language::de_DE => "de_DE".to_string(),
+        }
+    }
+}
+
+pub struct Config {
+    /// Save a timestamp alongside the scraped data
+    pub save_ts: bool,
+    /// Set the desired language for the extractor
+    pub language: Language,
+    /// Download media urls to disk
+    pub download_media: bool,
+    /// URL of the HTTP Proxy to use
+    pub http_proxy: Option<String>,
+    /// Embed media urls as data urls
+    pub embed_media: bool,
+    /// Force a specific extractor
+    pub force_extractor: Option<String>,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            save_ts: false,
+            language: Language::en_US,
+            download_media: false,
+            http_proxy: None,
+            embed_media: false,
+            force_extractor: None,
+        }
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,128 @@
+use clap::{App, Arg};
+use scrape::{Config, Language};
+use std::{io::Write, str::FromStr};
+use strum::VariantNames;
+
+#[must_use]
+pub fn cli_args() -> clap::ArgMatches<'static> {
+    App::new("Web Scraper")
+        /* .arg(Arg::with_name("sites")
+        .long("sites")
+        .takes_value(false)
+        .multiple(false)
+        .help("Show all supported sites"))*/
+        .arg(
+            Arg::with_name("t")
+                .short("t")
+                .long("timestamp")
+                .help("Store timestamp when scraping"),
+        )
+        .arg(
+            Arg::with_name("d")
+                .short("d")
+                .long("download")
+                .help("Download any found media urls"),
+        )
+        .arg(
+            Arg::with_name("lang")
+                .long("lang")
+                .takes_value(true)
+                .required(false)
+                .possible_values(Language::VARIANTS)
+                .default_value("en_US")
+                .help("Desired language to scrape in"),
+        )
+        .arg(
+            Arg::with_name("http-proxy")
+                .long("http-proxy")
+                .takes_value(true)
+                .help("HTTP Proxy"),
+        )
+        .arg(
+            Arg::with_name("extractor")
+                .long("extractor")
+                .help("Force specific extractor")
+                .possible_values(
+                    &scrape::extractors::get_extractors()
+                        .iter()
+                        .map(|x| x.name())
+                        .collect::<Vec<String>>()
+                        .iter()
+                        .map(std::string::String::as_str)
+                        .collect::<Vec<&str>>(),
+                )
+                .takes_value(true)
+                .required(false),
+        )
+        .arg(
+            Arg::with_name("e")
+                .short("e")
+                .long("embed-media")
+                .help("Embed media urls as data urls"),
+        )
+        .arg(
+            Arg::with_name("url")
+                .required(true)
+                .index(1)
+                .help("URL to scrape"),
+        )
+        .get_matches()
+}
+
+fn setup_logger() {
+    let mut logger = env_logger::builder();
+    #[cfg(debug_assertions)]
+    logger.filter_level(log::LevelFilter::Trace);
+    #[cfg(not(debug_assertions))]
+    logger.filter_level(log::LevelFilter::Info);
+
+    logger
+        .format(|buf, record| {
+            use log::Level;
+
+            let level = record.level();
+            let color = match level {
+                Level::Error => "\x1b[31m",
+                Level::Warn => "\x1b[33m",
+                Level::Info => "\x1b[32m",
+                Level::Debug => "\x1b[34m",
+                Level::Trace => "\x1b[35m",
+            };
+
+            writeln!(
+                buf,
+                "{}{}\x1b[0m [{}]: {}",
+                color,
+                record.metadata().level().to_string().to_uppercase(),
+                record.metadata().target(),
+                record.args()
+            )
+        })
+        .init();
+}
+
+#[tokio::main]
+async fn main() {
+    setup_logger();
+    let matches = cli_args();
+
+    let http_proxy = matches.value_of("http-proxy");
+    let url = matches.value_of("url").unwrap();
+
+    let conf = Config {
+        save_ts: matches.is_present("t"),
+        language: Language::from_str(matches.value_of("lang").unwrap()).expect("unknown language"),
+        download_media: matches.is_present("d"),
+        http_proxy: if http_proxy.is_some() {
+            Some(http_proxy.unwrap().to_owned())
+        } else {
+            None
+        },
+        embed_media: matches.is_present("e"),
+        force_extractor: matches
+            .value_of("extractor")
+            .map(std::string::ToString::to_string),
+    };
+
+    scrape::extractors::scrape_url(url, &conf).await;
+}
--- a/src/util.rs
+++ b/src/util.rs
@ -0,0 +1,395 @@
+use std::{io::Write, process::Stdio};
+
+use base64::Engine;
+use regex::Regex;
+use thirtyfour::WebElement;
+
+use crate::{attr_from, text_from, Config};
+
+/// Removes the last `n` characters from the given string `input` and returns a new `String`
+/// containing the modified content.
+///
+/// This function takes a reference to a string `input` and an unsigned integer `n`. It then
+/// creates a new string containing all characters of `input` except for the last `n` characters.
+/// The resulting modified string is returned.
+///
+/// # Arguments
+///
+/// * `input` - The input string from which characters will be removed.
+/// * `n` - The number of characters to remove from the end of the string.
+///
+/// # Returns
+///
+/// A new `String` containing the modified content with the last `n` characters removed.
+///
+/// # Examples
+///
+/// ```
+/// use scrape::util::remove_last_n_chars;
+///
+/// let input = "example";
+/// let modified = remove_last_n_chars(input, 3);
+/// assert_eq!(modified, "exam");
+/// ```
+#[must_use]
+pub fn remove_last_n_chars(input: &str, n: usize) -> String {
+    input[..input.len() - n].to_string()
+}
+
+/// Extracts overlapping windows of a specified size from a vector.
+///
+/// Given a vector `lst` and a window `size`, this function extracts overlapping
+/// windows of the specified size from the vector. The windows are extracted in a
+/// stride-like fashion, where every `size` elements are skipped before extracting
+/// the next window.
+///
+/// # Parameters
+///
+/// - `lst`: A vector of elements from which windows will be extracted.
+/// - `size`: The size of each window.
+///
+/// # Returns
+///
+/// A vector of vectors, where each inner vector represents an extracted window
+/// of elements from the input vector.
+///
+/// # Examples
+///
+/// ```rust
+/// use scrape::util::window;
+///
+/// let input: Vec<&str> = vec!["r", "u", "s", "t"];
+/// let out = window(input, 2);
+/// assert_eq!(out, vec![vec!["r", "u"], vec!["s", "t"]]);
+///
+/// let input: Vec<&str> = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i"];
+/// let out = window(input, 3);
+/// assert_eq!(out, vec![vec!["a", "b", "c"], vec!["d", "e", "f"], vec!["g", "h", "i"]]);
+/// ```
+pub fn window<T: Clone>(lst: &[T], size: usize) -> Vec<Vec<T>> {
+    let mut result = Vec::new();
+
+    let mut wd = lst.windows(size);
+
+    for i in 0..wd.len() {
+        let window = wd.next().unwrap();
+        if i % size != 0 {
+            continue;
+        }
+        result.push(window.to_owned());
+    }
+
+    result
+}
+
+/// Escapes unsafe characters in the given filename and returns a new `String` with
+/// the unsafe characters replaced by underscores.
+///
+/// This function takes a reference to a filename string and scans it for characters that
+/// are considered unsafe in filenames, such as `<`, `>`, `:`, `"`, `/`, `\`, `|`, `?`, `*`,
+/// control characters (0x00-0x1F), and DEL (0x7F). It then replaces all occurrences of such
+/// unsafe characters with underscores (`_`) in the filename and returns the modified string.
+///
+/// The function uses the `regex` crate to perform the replacement.
+///
+/// # Arguments
+///
+/// * `filename` - The filename string containing unsafe characters.
+///
+/// # Returns
+///
+/// A new `String` with unsafe characters replaced by underscores.
+///
+/// # Examples
+///
+/// ```
+/// use scrape::util::escape_unsafe_characters;
+///
+/// let filename = "hello/world?.txt";
+/// let escaped = escape_unsafe_characters(filename);
+/// assert_eq!(escaped, "hello_world_.txt");
+/// ```
+#[must_use]
+pub fn escape_unsafe_characters(filename: &str) -> String {
+    let unsafe_chars = r#"[<>:"/\\|?*\x00-\x1F\x7F]"#;
+    let re = Regex::new(unsafe_chars).unwrap();
+    re.replace_all(filename, "_").to_string()
+}
+
+/// Parses a date string using the provided format and returns the parsed date
+/// in the "YYYY-MM-DD" format as a `String`.
+///
+/// This function takes a reference to a date string and a format string that specifies
+/// the expected format of the input date. It attempts to parse the input date using the
+/// given format and returns an `Option<String>` containing the parsed date in the
+/// "YYYY-MM-DD" format if the parsing is successful. If parsing fails, `None` is returned.
+///
+/// The function uses the `chrono` crate to handle date parsing and formatting.
+///
+/// # Arguments
+///
+/// * `date` - The input date string to be parsed.
+/// * `format` - The format string specifying the expected format of the input date.
+///
+/// # Returns
+///
+/// An `Option<String>` containing the parsed date in "YYYY-MM-DD" format if parsing is successful,
+/// otherwise `None`.
+///
+/// # Examples
+///
+/// ```
+/// use scrape::util::parse_date;
+///
+/// let date_str = "20-08-2023";
+/// let format_str = "%d-%m-%Y";
+/// let parsed = parse_date(date_str, format_str);
+/// assert_eq!(parsed, Some(String::from("2023-08-20")));
+/// ```
+#[must_use]
+pub fn parse_date(date: &str, format: &str) -> Option<String> {
+    let date = chrono::NaiveDate::parse_from_str(date, format).ok()?;
+    Some(date.format("%Y-%m-%d").to_string())
+}
+
+/// Handles a media URL based on the provided configuration, downloading, saving, and
+/// potentially embedding the media content as a data URL.
+///
+/// This asynchronous function takes a reference to a URL, a file name, a boolean flag
+/// indicating whether to use the raw file name, and a reference to a `Config` instance.
+///
+/// # Arguments
+///
+/// * `url` - The URL of the media content.
+/// * `file_name` - The desired file name for saving the media content.
+/// * `raw_file_name` - A flag indicating whether to use the raw file name.
+/// * `conf` - A reference to a `Config` instance containing configuration settings.
+///
+/// # Returns
+///
+/// A `String` representing the Data URL or the original URL.
+pub async fn handle_media_url(
+    url: &str,
+    file_name: &str,
+    raw_file_name: bool,
+    conf: &Config,
+) -> String {
+    let file_name = escape_unsafe_characters(file_name);
+    if conf.download_media || conf.embed_media {
+        let data = download(url, conf).await;
+        if data.is_err() {
+            log::error!("Downloading '{url}' failed");
+            return url.to_string();
+        }
+        let data = data.unwrap();
+        if conf.download_media {
+            if raw_file_name {
+                save_raw(url, &data, &file_name);
+            } else {
+                save(url, &file_name, &data);
+            }
+        }
+        if conf.embed_media {
+            return to_data_url(&data);
+        }
+    }
+    url.to_string()
+}
+
+/// Escapes a given string by replacing spaces with underscores and converting it to lowercase.
+///
+/// This function takes a reference to a string `s` and performs the following actions:
+///
+/// 1. Replaces all occurrences of space characters (' ') with underscores ('_').
+/// 2. Converts the entire string to lowercase.
+///
+/// The modified string is then returned.
+///
+/// # Arguments
+///
+/// * `s` - The input string to be escaped.
+///
+/// # Returns
+///
+/// A new `String` with spaces replaced by underscores and converted to lowercase.
+///
+/// # Examples
+///
+/// ```
+/// use scrape::util::escape_key;
+///
+/// let original = "Hello World";
+/// let escaped = escape_key(original);
+/// assert_eq!(escaped, "hello_world");
+/// ```
+#[must_use]
+pub fn escape_key(s: &str) -> String {
+    s.replace(' ', "_").to_lowercase()
+}
+
+/// Converts binary data into a data URL string.
+///
+/// This function takes a reference to a slice of bytes `data` and performs the following actions:
+///
+/// The `file` command is used to determine the MIME type of the data by reading from stdin.
+///
+/// # Arguments
+///
+/// * `data` - The binary data to be converted to a data URL.
+///
+/// # Returns
+///
+/// A `String` containing the data URL.
+#[must_use]
+pub fn to_data_url(data: &[u8]) -> String {
+    let mut file_cmd = std::process::Command::new("file")
+        .arg("--mime-type")
+        .arg("-")
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .unwrap();
+
+    {
+        let mut stdin = file_cmd.stdin.take().unwrap();
+        stdin.write_all(data).unwrap();
+    }
+
+    let out = file_cmd.wait_with_output().expect("gpg executable error");
+    let stdout = String::from_utf8_lossy(&out.stdout).to_string();
+
+    let mime_type = remove_last_n_chars(&stdout.replace("/dev/stdin: ", ""), 1);
+    let base64_data = base64::engine::general_purpose::STANDARD.encode(data);
+    format!("data:{mime_type};base64,{base64_data}")
+}
+
+/// Downloads content from the provided URL using the given configuration.
+///
+/// # Arguments
+///
+/// * `url` - The URL from which to download content.
+/// * `conf` - A reference to a `Config` instance containing optional proxy settings.
+///
+/// # Returns
+///
+/// A `Result` containing either the downloaded content as a `Vec<u8>` or an error message.
+pub async fn download(url: &str, conf: &crate::Config) -> Result<Vec<u8>, String> {
+    let mut client_builder = reqwest::Client::builder();
+    if conf.http_proxy.is_some() {
+        let proxy = reqwest::Proxy::http(conf.http_proxy.clone().unwrap())
+            .ok()
+            .ok_or("could not create proxy")?;
+        client_builder = client_builder.proxy(proxy);
+    }
+    let user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36";
+    let client = client_builder
+        .user_agent(user_agent)
+        .build()
+        .ok()
+        .ok_or("could not create client")?;
+
+    let resp = client.get(url).send().await.ok().ok_or("request failed")?;
+
+    if resp.status() == reqwest::StatusCode::OK {
+        let data = resp
+            .bytes()
+            .await
+            .ok()
+            .ok_or("could not get response body")?;
+        return Ok(data.to_vec());
+    }
+
+    Err(format!("Request failed with Status {}", resp.status()))
+}
+
+fn save_raw(url: &str, data: &[u8], file_name: &str) {
+    match std::fs::write(file_name, data) {
+        Ok(()) => {
+            log::info!("Saved '{url}' to '{file_name}'");
+        }
+        Err(e) => {
+            log::error!("Error saving '{url}': {e:?}");
+        }
+    }
+}
+
+fn save(url: &str, file_name: &str, data: &[u8]) {
+    let p_url = url::Url::parse(url).unwrap();
+    let path_segments: Vec<_> = p_url.path_segments().unwrap().collect();
+    let file_ending = (*path_segments.last().unwrap_or(&"")).to_string();
+    let file_name = format!("{file_name}.{file_ending}");
+    save_raw(url, data, &file_name);
+}
+
+/// Extracts text content from a collection of web elements asynchronously.
+///
+/// # Arguments
+///
+/// * `v` - A `Vec<WebElement>` from which to extract text content.
+///
+/// # Returns
+///
+/// A `Vec<String>` containing the extracted text content from the `WebElement`.
+pub async fn extract_texts_from_elements(v: Vec<WebElement>) -> Vec<String> {
+    let mut ret: Vec<_> = vec![];
+    for e in v {
+        ret.push(text_from!(e));
+    }
+    ret
+}
+
+/// Extracts an attribute from a collection of web elements asynchronously.
+///
+/// # Arguments
+///
+/// * `v` - A `Vec<WebElement>` from which to extract attribute.
+///
+/// # Returns
+///
+/// A `Vec<String>` containing the extracted attribute from the `WebElement`.
+pub async fn extract_attrs_from_elements(v: Vec<WebElement>, attr: &str) -> Vec<String> {
+    let mut ret: Vec<_> = vec![];
+    for e in v {
+        ret.push(attr_from!(e, attr));
+    }
+    ret
+}
+
+/// Parses a string containing a currency value and symbol into a JSON map.
+///
+/// This function takes a reference to a string `v` representing a currency value along
+/// with its symbol (e.g., "$123.45").
+///
+/// # Arguments
+///
+/// * `v` - The input string containing a currency value and symbol.
+///
+/// # Returns
+///
+/// A JSON map with keys "currency" and "value" representing the currency symbol and value, respectively.
+///
+/// # Examples
+///
+/// ```rust
+/// use scrape::util::currency;
+///
+/// let currency_str = "123.45$";
+/// let json_map = currency(currency_str);
+/// assert_eq!(json_map.get("currency").unwrap().as_str().unwrap(), "$");
+/// assert_eq!(json_map.get("value").unwrap().as_f64().unwrap(), 123.45);
+/// ```
+#[must_use]
+pub fn currency(v: &str) -> serde_json::Map<String, serde_json::Value> {
+    let re = Regex::new(r"^([\d,.]+)([^\d,.]+)$").unwrap();
+
+    let captures = re.captures(v).unwrap();
+    let value_str = captures.get(1).unwrap().as_str().replace(',', ".");
+    let value = value_str.parse::<f64>().unwrap();
+    let currency_symbol = captures.get(2).unwrap().as_str().to_string();
+
+    let mut result: serde_json::Map<String, serde_json::Value> = serde_json::Map::new();
+    result.insert("currency".to_string(), currency_symbol.into());
+    result.insert("value".to_string(), value.into());
+
+    result
+}