Compare commits

..

No commits in common. "fdaff7023122b61dc12981795c566e17255b1904" and "4faef5e0173faa9063cc8009b93e318559115956" have entirely different histories.

7 changed files with 75 additions and 95 deletions

View File

@ -1,6 +1,3 @@
// Rocket generates a lot of warnings for handlers
// TODO: figure out why
#![allow(unreachable_patterns)]
#[macro_use] #[macro_use]
extern crate rocket; extern crate rocket;
use std::{error::Error, io::Cursor, str::FromStr}; use std::{error::Error, io::Cursor, str::FromStr};

View File

@ -14,16 +14,19 @@ use lol_html::{
RewriteStrSettings, RewriteStrSettings,
}; };
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use scraper::{Html, Selector}; use scraper::{error::SelectorErrorKind, Html, Selector};
use thiserror::Error; use thiserror::Error;
use url::Url; use url::Url;
use crate::newsreader::{extract_thread_id, is_newsreader_thread}; use crate::newsreader::{
extract_thread_id, is_newsreader_search, is_newsreader_thread, make_news_tag,
};
const NON_EXISTENT_SITE_NAME: &'static str = "NO-SUCH-SITE";
// TODO: figure out how to use Cow // TODO: figure out how to use Cow
#[async_trait] #[async_trait]
trait Transformer: Send + Sync { trait Transformer: Send + Sync {
fn should_run(&self, _addr: &Option<Url>, _html: &str) -> bool { fn should_run(&self, addr: &Option<Url>, _html: &str) -> bool {
true true
} }
// TODO: should html be something like `html_escape` uses: // TODO: should html be something like `html_escape` uses:
@ -129,60 +132,39 @@ impl Transformer for InlineStyle {
} }
} }
/// Process images will extract any alt or title tags on images and place them as labels below said /// Frame images will extract any alt or title tags on images and place them as labels below said
/// image. It also handles data-src and data-cfsrc attributes /// image.
struct FrameImages; struct FrameImages;
#[async_trait] #[async_trait]
impl Transformer for FrameImages { impl Transformer for FrameImages {
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> { async fn transform(&self, link: &Option<Url>, html: &str) -> Result<String, TransformError> {
Ok(rewrite_str( Ok(rewrite_str(
html, html,
RewriteStrSettings { RewriteStrSettings {
element_content_handlers: vec![ element_content_handlers: vec![element!("img[alt], img[title]", |el| {
element!("img[data-src]", |el| { info!("found image with alt or title {el:?}");
info!("found image with data-src {el:?}"); let src = el
let src = el .get_attribute("src")
.get_attribute("data-src") .unwrap_or("https://placehold.co/600x400".to_string());
.unwrap_or("https://placehold.co/600x400".to_string()); let alt = el.get_attribute("alt");
el.set_attribute("src", &src)?; let title = el.get_attribute("title");
let mut frags = vec!["<figure>".to_string(), format!(r#"<img src="{src}">"#)];
alt.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Alt: {t}</figcaption>"))
}
});
title.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Title: {t}</figcaption>"))
}
});
frags.push("</figure>".to_string());
el.replace(&frags.join("\n"), ContentType::Html);
Ok(()) Ok(())
}), })],
element!("img[data-cfsrc]", |el| {
info!("found image with data-cfsrc {el:?}");
let src = el
.get_attribute("data-cfsrc")
.unwrap_or("https://placehold.co/600x400".to_string());
el.set_attribute("src", &src)?;
Ok(())
}),
element!("img[alt], img[title]", |el| {
info!("found image with alt or title {el:?}");
let src = el
.get_attribute("src")
.unwrap_or("https://placehold.co/600x400".to_string());
let alt = el.get_attribute("alt");
let title = el.get_attribute("title");
let mut frags =
vec!["<figure>".to_string(), format!(r#"<img src="{src}">"#)];
alt.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Alt: {t}</figcaption>"))
}
});
title.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Title: {t}</figcaption>"))
}
});
frags.push("</figure>".to_string());
el.replace(&frags.join("\n"), ContentType::Html);
Ok(())
}),
],
..RewriteStrSettings::default() ..RewriteStrSettings::default()
}, },
)?) )?)
@ -231,7 +213,7 @@ impl SlurpContents {
#[async_trait] #[async_trait]
impl Transformer for SlurpContents { impl Transformer for SlurpContents {
fn should_run(&self, link: &Option<Url>, _: &str) -> bool { fn should_run(&self, link: &Option<Url>, html: &str) -> bool {
if let Some(link) = link { if let Some(link) = link {
return self.get_selectors(link).is_some(); return self.get_selectors(link).is_some();
} }
@ -293,24 +275,6 @@ pub fn sanitize_html(
cid_prefix: &str, cid_prefix: &str,
base_url: &Option<Url>, base_url: &Option<Url>,
) -> Result<String, TransformError> { ) -> Result<String, TransformError> {
let inline_opts = InlineOptions {
inline_style_tags: true,
keep_style_tags: false,
keep_link_tags: false,
base_url: None,
load_remote_stylesheets: false,
extra_css: None,
preallocate_node_capacity: 32,
..InlineOptions::default()
};
let html = match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline CSS: {err}");
html.to_string()
}
};
let mut element_content_handlers = vec![ let mut element_content_handlers = vec![
// Open links in new tab // Open links in new tab
element!("a[href]", |el| { element!("a[href]", |el| {
@ -361,13 +325,25 @@ pub fn sanitize_html(
}), }),
]); ]);
} }
let html = rewrite_str(
&html, let inline_opts = InlineOptions {
RewriteStrSettings { inline_style_tags: true,
element_content_handlers, keep_style_tags: false,
..RewriteStrSettings::default() keep_link_tags: false,
}, base_url: None,
)?; load_remote_stylesheets: false,
extra_css: None,
preallocate_node_capacity: 32,
..InlineOptions::default()
};
let inlined_html = match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline CSS: {err}");
html.to_string()
}
};
// Default's don't allow style, but we want to preserve that. // Default's don't allow style, but we want to preserve that.
// TODO: remove 'class' if rendering mails moves to a two phase process where abstract message // TODO: remove 'class' if rendering mails moves to a two phase process where abstract message
// types are collected, santized, and then grouped together as one big HTML doc // types are collected, santized, and then grouped together as one big HTML doc
@ -415,7 +391,6 @@ pub fn sanitize_html(
"hgroup", "hgroup",
"hr", "hr",
"i", "i",
"iframe", // wathiede
"img", "img",
"ins", "ins",
"kbd", "kbd",
@ -424,7 +399,6 @@ pub fn sanitize_html(
"map", "map",
"mark", "mark",
"nav", "nav",
"noscript", // wathiede
"ol", "ol",
"p", "p",
"pre", "pre",
@ -478,9 +452,6 @@ pub fn sanitize_html(
"hr" => hashset![ "hr" => hashset![
"align", "size", "width" "align", "size", "width"
], ],
"iframe" => hashset![
"src", "allow", "allowfullscreen"
],
"img" => hashset![ "img" => hashset![
"align", "alt", "height", "src", "width" "align", "alt", "height", "src", "width"
], ],
@ -516,14 +487,21 @@ pub fn sanitize_html(
], ],
]; ];
let html = ammonia::Builder::default() let rewritten_html = rewrite_str(
&inlined_html,
RewriteStrSettings {
element_content_handlers,
..RewriteStrSettings::default()
},
)?;
let clean_html = ammonia::Builder::default()
.tags(tags) .tags(tags)
.tag_attributes(tag_attributes) .tag_attributes(tag_attributes)
.generic_attributes(attributes) .generic_attributes(attributes)
.clean(&html) .clean(&rewritten_html)
.to_string(); .to_string();
Ok(html) Ok(clean_html)
} }
fn compute_offset_limit( fn compute_offset_limit(
@ -602,6 +580,7 @@ impl FromStr for Query {
let mut tag = None; let mut tag = None;
let mut uid = None; let mut uid = None;
let mut remainder = Vec::new(); let mut remainder = Vec::new();
let site_prefix = make_news_tag("");
let mut is_notmuch = false; let mut is_notmuch = false;
let mut is_newsreader = false; let mut is_newsreader = false;
for word in s.split_whitespace() { for word in s.split_whitespace() {

View File

@ -14,7 +14,7 @@ const THREAD_PREFIX: &'static str = "news:";
use crate::{ use crate::{
compute_offset_limit, compute_offset_limit,
error::ServerError, error::ServerError,
graphql::{NewsPost, Tag, Thread, ThreadSummary}, graphql::{Body, Email, Html, Message, NewsPost, Tag, Thread, ThreadSummary},
AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml, AddOutlink, EscapeHtml, FrameImages, InlineStyle, SanitizeHtml, SlurpContents, StripHtml,
Transformer, Transformer,
}; };
@ -176,10 +176,6 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
let body_tranformers: Vec<Box<dyn Transformer>> = vec![ let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents { Box::new(SlurpContents {
site_selectors: hashmap![ site_selectors: hashmap![
"blog.cloudflare.com".to_string() => vec![
Selector::parse(".author-lists").unwrap(),
Selector::parse(".post-full-content").unwrap()
],
"hackaday.com".to_string() => vec![ "hackaday.com".to_string() => vec![
Selector::parse("div.entry-featured-image").unwrap(), Selector::parse("div.entry-featured-image").unwrap(),
Selector::parse("div.entry-content").unwrap() Selector::parse("div.entry-content").unwrap()

View File

@ -307,6 +307,7 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
.map(|t| Tag { .map(|t| Tag {
name: t.name, name: t.name,
bg_color: t.bg_color, bg_color: t.bg_color,
fg_color: t.fg_color,
unread: t.unread, unread: t.unread,
}) })
.collect(), .collect(),
@ -356,13 +357,18 @@ pub fn update(msg: Msg, model: &mut Model, orders: &mut impl Orders<Msg>) {
.map(|t| Tag { .map(|t| Tag {
name: t.name, name: t.name,
bg_color: t.bg_color, bg_color: t.bg_color,
fg_color: t.fg_color,
unread: t.unread, unread: t.unread,
}) })
.collect(), .collect(),
); );
match &data.thread { match &data.thread {
graphql::show_thread_query::ShowThreadQueryThread::EmailThread( graphql::show_thread_query::ShowThreadQueryThread::EmailThread(
ShowThreadQueryThreadOnEmailThread { messages, .. }, ShowThreadQueryThreadOnEmailThread {
thread_id,
subject,
messages,
},
) => { ) => {
let mut open_messages: HashSet<_> = messages let mut open_messages: HashSet<_> = messages
.iter() .iter()
@ -549,6 +555,7 @@ pub enum Context {
pub struct Tag { pub struct Tag {
pub name: String, pub name: String,
pub bg_color: String, pub bg_color: String,
pub fg_color: String,
pub unread: i64, pub unread: i64,
} }
@ -597,8 +604,6 @@ pub enum Msg {
SelectionSetNone, SelectionSetNone,
SelectionSetAll, SelectionSetAll,
SelectionAddTag(String), SelectionAddTag(String),
#[allow(dead_code)]
// TODO
SelectionRemoveTag(String), SelectionRemoveTag(String),
SelectionMarkAsRead, SelectionMarkAsRead,
SelectionMarkAsUnread, SelectionMarkAsUnread,

View File

@ -1,7 +1,8 @@
use seed::{prelude::*, *}; use seed::{prelude::*, *};
use seed_hooks::topo; use seed_hooks::{state_access::CloneState, topo, use_state};
use crate::{ use crate::{
api::urls,
graphql::show_thread_query::*, graphql::show_thread_query::*,
state::{Context, Model, Msg}, state::{Context, Model, Msg},
view::{self, reading_progress, view_header, view_search_results, view_tags}, view::{self, reading_progress, view_header, view_search_results, view_tags},

View File

@ -1028,6 +1028,7 @@ pub fn view_tags(model: &Model) -> Node<Msg> {
&Tag { &Tag {
name: parts[..i + 1].join("/"), name: parts[..i + 1].join("/"),
bg_color: "#fff".to_string(), bg_color: "#fff".to_string(),
fg_color: "#000".to_string(),
unread: 0, unread: 0,
}, },
search_unread, search_unread,

View File

@ -1,3 +1,4 @@
use log::info;
use seed::{prelude::*, *}; use seed::{prelude::*, *};
use crate::{ use crate::{