improve favicons fetching

This commit is contained in:
Bilal Elmoussaoui 2020-12-19 05:03:40 +01:00
parent 11196b6412
commit cd3a113a10
6 changed files with 96 additions and 44 deletions

1
Cargo.lock generated
View File

@ -235,6 +235,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"ashpd",
"async-std",
"byteorder",
"diesel",
"diesel_migrations",

View File

@ -31,4 +31,4 @@ serde_json = "1.0"
qrcode = {version ="0.12",features=["image"]}
gtk = { git = "https://github.com/gtk-rs/gtk4-rs", package = "gtk4"}
libhandy = { git = "https://gitlab.gnome.org/bilelmoussaoui/libhandy4-rs", package = "libhandy4"}
async-std = "1.8"

View File

@ -223,7 +223,7 @@
</child>
</template>
<object class="GtkEntryCompletion" id="provider_completion">
<property name="minimum-key-length">2</property>
<property name="minimum-key-length">1</property>
<property name="text-column">1</property>
<property name="inline-selection">True</property>
<child>

View File

@ -15,7 +15,7 @@
<child>
<object class="ProviderImage" id="image">
<property name="halign">start</property>
<property name="size">48</property>
<property name="size">32</property>
</object>
</child>
<child>

View File

@ -1,6 +1,7 @@
use image::io::Reader as ImageReader;
use quick_xml::events::{attributes::Attribute, BytesStart, Event};
use std::io::Cursor;
use url::Url;
const SUPPORTED_RELS: [&[u8]; 7] = [
b"icon",
b"fluid-icon",
@ -15,7 +16,6 @@ const SUPPORTED_RELS: [&[u8]; 7] = [
pub enum FaviconError {
Surf(surf::Error),
Url(url::ParseError),
GLib(gtk::glib::Error),
NoResults,
}
@ -25,51 +25,103 @@ impl From<surf::Error> for FaviconError {
}
}
impl From<gtk::glib::Error> for FaviconError {
fn from(e: gtk::glib::Error) -> Self {
Self::GLib(e)
}
}
impl From<url::ParseError> for FaviconError {
fn from(e: url::ParseError) -> Self {
Self::Url(e)
}
}
pub struct Favicon {
icons: Vec<Url>,
impl std::error::Error for FaviconError {}
impl std::fmt::Display for FaviconError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FaviconError::NoResults => write!(f, "FaviconError: No results were found"),
e => write!(f, "FaviconError: {}", e),
}
}
}
impl Favicon {}
#[derive(Debug)]
pub struct FaviconScrapper;
pub struct Favicon(Vec<Url>, surf::Client);
impl FaviconScrapper {
pub async fn from_url(url: Url) -> Result<Vec<Url>, FaviconError> {
let mut res = surf::get(&url).await?;
let body = res.body_string().await?;
let mut reader = quick_xml::Reader::from_str(&body);
let icons = Self::from_reader(&mut reader, &url);
Ok(icons)
impl Favicon {
pub async fn find_best(&self) -> Option<&Url> {
let mut largest_size = 0;
let mut best = None;
for url in self.0.iter() {
if let Some(size) = self.get_size(url).await {
// Only store the width & assumes it has the same height here to simplify things
if size.0 > largest_size {
largest_size = size.0;
best = Some(url);
}
}
}
best
}
fn from_reader(reader: &mut quick_xml::Reader<&[u8]>, base_url: &Url) -> Vec<Url> {
pub async fn get_size(&self, url: &Url) -> Option<(u32, u32)> {
let mut response = self.1.get(url).await.ok()?;
let ext = std::path::Path::new(url.path())
.extension()
.map(|e| e.to_str().unwrap())?;
// Assumes the svg is the best size we can find
if ext == "svg" {
return Some((1024, 1024));
}
let format = match ext {
"png" => image::ImageFormat::Png,
"ico" => image::ImageFormat::Ico,
_ => unreachable!(),
};
let bytes = response.body_bytes().await.ok()?;
let mut image = ImageReader::new(Cursor::new(bytes));
image.set_format(format);
image.into_dimensions().ok()
}
}
#[derive(Debug)]
pub struct FaviconScrapper(surf::Client);
impl FaviconScrapper {
pub fn new() -> Self {
let client = surf::client().with(surf::middleware::Redirect::default());
Self(client)
}
pub async fn from_url(&self, url: Url) -> Result<Favicon, FaviconError> {
let mut res = self.0.get(&url).header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 Safari/605.1.15").await?;
let body = res.body_string().await?;
let mut reader = quick_xml::Reader::from_str(&body);
reader.check_end_names(false);
reader.trim_markup_names_in_closing_tags(true);
let icons = self.from_reader(&mut reader, &url);
if icons.is_empty() {
return Err(FaviconError::NoResults);
}
Ok(Favicon(icons, self.0.clone()))
}
fn from_reader(&self, reader: &mut quick_xml::Reader<&[u8]>, base_url: &Url) -> Vec<Url> {
let mut buf = Vec::new();
let mut urls = Vec::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
if let b"link" = e.name() {
if let Some(url) = Self::from_link(e, base_url) {
if let Some(url) = self.from_link(e, base_url) {
urls.push(url);
}
}
}
Ok(Event::Eof) => break,
Err(e) => warn!("Error at position {}: {:?}", reader.buffer_position(), e),
Err(e) => debug!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (),
}
}
@ -77,7 +129,7 @@ impl FaviconScrapper {
urls
}
fn from_link(e: &BytesStart, base_url: &Url) -> Option<Url> {
fn from_link(&self, e: &BytesStart, base_url: &Url) -> Option<Url> {
let mut url = None;
let mut has_proper_rel = false;
@ -87,7 +139,10 @@ impl FaviconScrapper {
key: b"href",
value,
}) => {
let href = String::from_utf8(value.into_owned()).unwrap();
let mut href = String::from_utf8(value.into_owned()).unwrap();
if href.starts_with("//") {
href = format!("https:{}", href);
}
url = match Url::parse(&href) {
Ok(url) => Some(url),
Err(url::ParseError::RelativeUrlWithoutBase) => base_url.join(&href).ok(),

View File

@ -3,6 +3,7 @@ use crate::{
models::{database, Account, AccountsModel, FaviconError, FaviconScrapper},
schema::providers,
};
use async_std::prelude::*;
use anyhow::Result;
use core::cmp::Ordering;
use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
@ -359,32 +360,27 @@ impl Provider {
.expect("Failed to create provider")
}
pub async fn favicon(&self) -> Result<gio::File, FaviconError> {
pub async fn favicon(&self) -> Result<gio::File, Box<dyn std::error::Error>> {
if let Some(ref website) = self.website() {
let website_url = Url::parse(website)?;
let favicons = FaviconScrapper::from_url(website_url).await?;
let favicon = FaviconScrapper::new().from_url(website_url).await?;
let icon_name = format!("{}_{}", self.id(), self.name().replace(' ', "_"));
let cache_path = glib::get_user_cache_dir()
.join("authenticator")
.join("favicons")
.join(icon_name);
let dest = gio::File::new_for_path(cache_path);
let mut dest = async_std::fs::File::create(cache_path.clone()).await?;
if let Some(favicon) = favicons.get(0) {
let mut res = surf::get(favicon).await?;
if let Some(best_favicon) = favicon.find_best().await {
let mut res = surf::get(best_favicon).await?;
let body = res.body_bytes().await?;
dest.replace_contents(
&body,
None,
false,
gio::FileCreateFlags::REPLACE_DESTINATION,
gio::NONE_CANCELLABLE,
)?;
return Ok(dest);
dest.write_all(&body).await?;
return Ok(gio::File::new_for_path(cache_path));
}
}
Err(FaviconError::NoResults)
Err(Box::new(FaviconError::NoResults))
}
pub fn id(&self) -> i32 {