From fdaff7023122b61dc12981795c566e17255b1904 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sun, 1 Sep 2024 11:05:07 -0700 Subject: [PATCH] server: improve cloudflare and grafana image and iframe rendering --- server/src/lib.rs | 131 +++++++++++++++++++++++---------------- server/src/newsreader.rs | 4 ++ 2 files changed, 82 insertions(+), 53 deletions(-) diff --git a/server/src/lib.rs b/server/src/lib.rs index 639a77f..5184091 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -129,8 +129,8 @@ impl Transformer for InlineStyle { } } -/// Frame images will extract any alt or title tags on images and place them as labels below said -/// image. +/// Process images will extract any alt or title tags on images and place them as labels below said +/// image. It also handles data-src and data-cfsrc attributes struct FrameImages; #[async_trait] @@ -139,29 +139,50 @@ impl Transformer for FrameImages { Ok(rewrite_str( html, RewriteStrSettings { - element_content_handlers: vec![element!("img[alt], img[title]", |el| { - info!("found image with alt or title {el:?}"); - let src = el - .get_attribute("src") - .unwrap_or("https://placehold.co/600x400".to_string()); - let alt = el.get_attribute("alt"); - let title = el.get_attribute("title"); - let mut frags = vec!["
".to_string(), format!(r#""#)]; - alt.map(|t| { - if !t.is_empty() { - frags.push(format!("
Alt: {t}
")) - } - }); - title.map(|t| { - if !t.is_empty() { - frags.push(format!("
Title: {t}
")) - } - }); - frags.push("
".to_string()); - el.replace(&frags.join("\n"), ContentType::Html); + element_content_handlers: vec![ + element!("img[data-src]", |el| { + info!("found image with data-src {el:?}"); + let src = el + .get_attribute("data-src") + .unwrap_or("https://placehold.co/600x400".to_string()); + el.set_attribute("src", &src)?; - Ok(()) - })], + Ok(()) + }), + element!("img[data-cfsrc]", |el| { + info!("found image with data-cfsrc {el:?}"); + let src = el + .get_attribute("data-cfsrc") + .unwrap_or("https://placehold.co/600x400".to_string()); + el.set_attribute("src", &src)?; + + Ok(()) + }), + element!("img[alt], img[title]", |el| { + info!("found image with alt or title {el:?}"); + let src = el + .get_attribute("src") + .unwrap_or("https://placehold.co/600x400".to_string()); + let alt = el.get_attribute("alt"); + let title = el.get_attribute("title"); + let mut frags = + vec!["
".to_string(), format!(r#""#)]; + alt.map(|t| { + if !t.is_empty() { + frags.push(format!("
Alt: {t}
")) + } + }); + title.map(|t| { + if !t.is_empty() { + frags.push(format!("
Title: {t}
")) + } + }); + frags.push("
".to_string()); + el.replace(&frags.join("\n"), ContentType::Html); + + Ok(()) + }), + ], ..RewriteStrSettings::default() }, )?) @@ -272,6 +293,24 @@ pub fn sanitize_html( cid_prefix: &str, base_url: &Option, ) -> Result { + let inline_opts = InlineOptions { + inline_style_tags: true, + keep_style_tags: false, + keep_link_tags: false, + base_url: None, + load_remote_stylesheets: false, + extra_css: None, + preallocate_node_capacity: 32, + ..InlineOptions::default() + }; + + let html = match CSSInliner::new(inline_opts).inline(&html) { + Ok(inlined_html) => inlined_html, + Err(err) => { + error!("failed to inline CSS: {err}"); + html.to_string() + } + }; let mut element_content_handlers = vec![ // Open links in new tab element!("a[href]", |el| { @@ -322,25 +361,13 @@ pub fn sanitize_html( }), ]); } - - let inline_opts = InlineOptions { - inline_style_tags: true, - keep_style_tags: false, - keep_link_tags: false, - base_url: None, - load_remote_stylesheets: false, - extra_css: None, - preallocate_node_capacity: 32, - ..InlineOptions::default() - }; - - let inlined_html = match CSSInliner::new(inline_opts).inline(&html) { - Ok(inlined_html) => inlined_html, - Err(err) => { - error!("failed to inline CSS: {err}"); - html.to_string() - } - }; + let html = rewrite_str( + &html, + RewriteStrSettings { + element_content_handlers, + ..RewriteStrSettings::default() + }, + )?; // Default's don't allow style, but we want to preserve that. // TODO: remove 'class' if rendering mails moves to a two phase process where abstract message // types are collected, santized, and then grouped together as one big HTML doc @@ -388,6 +415,7 @@ pub fn sanitize_html( "hgroup", "hr", "i", + "iframe", // wathiede "img", "ins", "kbd", @@ -396,6 +424,7 @@ pub fn sanitize_html( "map", "mark", "nav", + "noscript", // wathiede "ol", "p", "pre", @@ -449,6 +478,9 @@ pub fn sanitize_html( "hr" => hashset![ "align", "size", "width" ], + "iframe" => hashset![ + "src", "allow", "allowfullscreen" + ], "img" => hashset![ "align", "alt", "height", "src", "width" ], @@ -484,21 +516,14 @@ pub fn sanitize_html( ], ]; - let rewritten_html = rewrite_str( - &inlined_html, - RewriteStrSettings { - element_content_handlers, - ..RewriteStrSettings::default() - }, - )?; - let clean_html = ammonia::Builder::default() + let html = ammonia::Builder::default() .tags(tags) .tag_attributes(tag_attributes) .generic_attributes(attributes) - .clean(&rewritten_html) + .clean(&html) .to_string(); - Ok(clean_html) + Ok(html) } fn compute_offset_limit( diff --git a/server/src/newsreader.rs b/server/src/newsreader.rs index cafec74..3cced44 100644 --- a/server/src/newsreader.rs +++ b/server/src/newsreader.rs @@ -176,6 +176,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result> = vec![ Box::new(SlurpContents { site_selectors: hashmap![ + "blog.cloudflare.com".to_string() => vec![ + Selector::parse(".author-lists").unwrap(), + Selector::parse(".post-full-content").unwrap() + ], "hackaday.com".to_string() => vec![ Selector::parse("div.entry-featured-image").unwrap(), Selector::parse("div.entry-content").unwrap()