server: improve cloudflare and grafana image and iframe rendering

This commit is contained in:
Bill Thiede 2024-09-01 11:05:07 -07:00
parent 7218c13b9e
commit fdaff70231
2 changed files with 82 additions and 53 deletions

View File

@ -129,8 +129,8 @@ impl Transformer for InlineStyle {
} }
} }
/// Frame images will extract any alt or title tags on images and place them as labels below said /// Process images will extract any alt or title tags on images and place them as labels below said
/// image. /// image. It also handles data-src and data-cfsrc attributes
struct FrameImages; struct FrameImages;
#[async_trait] #[async_trait]
@ -139,29 +139,50 @@ impl Transformer for FrameImages {
Ok(rewrite_str( Ok(rewrite_str(
html, html,
RewriteStrSettings { RewriteStrSettings {
element_content_handlers: vec![element!("img[alt], img[title]", |el| { element_content_handlers: vec![
info!("found image with alt or title {el:?}"); element!("img[data-src]", |el| {
let src = el info!("found image with data-src {el:?}");
.get_attribute("src") let src = el
.unwrap_or("https://placehold.co/600x400".to_string()); .get_attribute("data-src")
let alt = el.get_attribute("alt"); .unwrap_or("https://placehold.co/600x400".to_string());
let title = el.get_attribute("title"); el.set_attribute("src", &src)?;
let mut frags = vec!["<figure>".to_string(), format!(r#"<img src="{src}">"#)];
alt.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Alt: {t}</figcaption>"))
}
});
title.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Title: {t}</figcaption>"))
}
});
frags.push("</figure>".to_string());
el.replace(&frags.join("\n"), ContentType::Html);
Ok(()) Ok(())
})], }),
element!("img[data-cfsrc]", |el| {
info!("found image with data-cfsrc {el:?}");
let src = el
.get_attribute("data-cfsrc")
.unwrap_or("https://placehold.co/600x400".to_string());
el.set_attribute("src", &src)?;
Ok(())
}),
element!("img[alt], img[title]", |el| {
info!("found image with alt or title {el:?}");
let src = el
.get_attribute("src")
.unwrap_or("https://placehold.co/600x400".to_string());
let alt = el.get_attribute("alt");
let title = el.get_attribute("title");
let mut frags =
vec!["<figure>".to_string(), format!(r#"<img src="{src}">"#)];
alt.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Alt: {t}</figcaption>"))
}
});
title.map(|t| {
if !t.is_empty() {
frags.push(format!("<figcaption>Title: {t}</figcaption>"))
}
});
frags.push("</figure>".to_string());
el.replace(&frags.join("\n"), ContentType::Html);
Ok(())
}),
],
..RewriteStrSettings::default() ..RewriteStrSettings::default()
}, },
)?) )?)
@ -272,6 +293,24 @@ pub fn sanitize_html(
cid_prefix: &str, cid_prefix: &str,
base_url: &Option<Url>, base_url: &Option<Url>,
) -> Result<String, TransformError> { ) -> Result<String, TransformError> {
let inline_opts = InlineOptions {
inline_style_tags: true,
keep_style_tags: false,
keep_link_tags: false,
base_url: None,
load_remote_stylesheets: false,
extra_css: None,
preallocate_node_capacity: 32,
..InlineOptions::default()
};
let html = match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline CSS: {err}");
html.to_string()
}
};
let mut element_content_handlers = vec![ let mut element_content_handlers = vec![
// Open links in new tab // Open links in new tab
element!("a[href]", |el| { element!("a[href]", |el| {
@ -322,25 +361,13 @@ pub fn sanitize_html(
}), }),
]); ]);
} }
let html = rewrite_str(
let inline_opts = InlineOptions { &html,
inline_style_tags: true, RewriteStrSettings {
keep_style_tags: false, element_content_handlers,
keep_link_tags: false, ..RewriteStrSettings::default()
base_url: None, },
load_remote_stylesheets: false, )?;
extra_css: None,
preallocate_node_capacity: 32,
..InlineOptions::default()
};
let inlined_html = match CSSInliner::new(inline_opts).inline(&html) {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline CSS: {err}");
html.to_string()
}
};
// Default's don't allow style, but we want to preserve that. // Default's don't allow style, but we want to preserve that.
// TODO: remove 'class' if rendering mails moves to a two phase process where abstract message // TODO: remove 'class' if rendering mails moves to a two phase process where abstract message
// types are collected, santized, and then grouped together as one big HTML doc // types are collected, santized, and then grouped together as one big HTML doc
@ -388,6 +415,7 @@ pub fn sanitize_html(
"hgroup", "hgroup",
"hr", "hr",
"i", "i",
"iframe", // wathiede
"img", "img",
"ins", "ins",
"kbd", "kbd",
@ -396,6 +424,7 @@ pub fn sanitize_html(
"map", "map",
"mark", "mark",
"nav", "nav",
"noscript", // wathiede
"ol", "ol",
"p", "p",
"pre", "pre",
@ -449,6 +478,9 @@ pub fn sanitize_html(
"hr" => hashset![ "hr" => hashset![
"align", "size", "width" "align", "size", "width"
], ],
"iframe" => hashset![
"src", "allow", "allowfullscreen"
],
"img" => hashset![ "img" => hashset![
"align", "alt", "height", "src", "width" "align", "alt", "height", "src", "width"
], ],
@ -484,21 +516,14 @@ pub fn sanitize_html(
], ],
]; ];
let rewritten_html = rewrite_str( let html = ammonia::Builder::default()
&inlined_html,
RewriteStrSettings {
element_content_handlers,
..RewriteStrSettings::default()
},
)?;
let clean_html = ammonia::Builder::default()
.tags(tags) .tags(tags)
.tag_attributes(tag_attributes) .tag_attributes(tag_attributes)
.generic_attributes(attributes) .generic_attributes(attributes)
.clean(&rewritten_html) .clean(&html)
.to_string(); .to_string();
Ok(clean_html) Ok(html)
} }
fn compute_offset_limit( fn compute_offset_limit(

View File

@ -176,6 +176,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
let body_tranformers: Vec<Box<dyn Transformer>> = vec![ let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents { Box::new(SlurpContents {
site_selectors: hashmap![ site_selectors: hashmap![
"blog.cloudflare.com".to_string() => vec![
Selector::parse(".author-lists").unwrap(),
Selector::parse(".post-full-content").unwrap()
],
"hackaday.com".to_string() => vec![ "hackaday.com".to_string() => vec![
Selector::parse("div.entry-featured-image").unwrap(), Selector::parse("div.entry-featured-image").unwrap(),
Selector::parse("div.entry-content").unwrap() Selector::parse("div.entry-content").unwrap()