diff --git a/server/src/lib.rs b/server/src/lib.rs
index 639a77f..5184091 100644
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@@ -129,8 +129,8 @@ impl Transformer for InlineStyle {
}
}
-/// Frame images will extract any alt or title tags on images and place them as labels below said
-/// image.
+/// Process images will extract any alt or title tags on images and place them as labels below said
+/// image. It also handles data-src and data-cfsrc attributes
struct FrameImages;
#[async_trait]
@@ -139,29 +139,50 @@ impl Transformer for FrameImages {
Ok(rewrite_str(
html,
RewriteStrSettings {
- element_content_handlers: vec![element!("img[alt], img[title]", |el| {
- info!("found image with alt or title {el:?}");
- let src = el
- .get_attribute("src")
- .unwrap_or("https://placehold.co/600x400".to_string());
- let alt = el.get_attribute("alt");
- let title = el.get_attribute("title");
- let mut frags = vec!["".to_string(), format!(r#"
"#)];
- alt.map(|t| {
- if !t.is_empty() {
- frags.push(format!("Alt: {t}"))
- }
- });
- title.map(|t| {
- if !t.is_empty() {
- frags.push(format!("Title: {t}"))
- }
- });
- frags.push("".to_string());
- el.replace(&frags.join("\n"), ContentType::Html);
+ element_content_handlers: vec![
+ element!("img[data-src]", |el| {
+ info!("found image with data-src {el:?}");
+ let src = el
+ .get_attribute("data-src")
+ .unwrap_or("https://placehold.co/600x400".to_string());
+ el.set_attribute("src", &src)?;
- Ok(())
- })],
+ Ok(())
+ }),
+ element!("img[data-cfsrc]", |el| {
+ info!("found image with data-cfsrc {el:?}");
+ let src = el
+ .get_attribute("data-cfsrc")
+ .unwrap_or("https://placehold.co/600x400".to_string());
+ el.set_attribute("src", &src)?;
+
+ Ok(())
+ }),
+ element!("img[alt], img[title]", |el| {
+ info!("found image with alt or title {el:?}");
+ let src = el
+ .get_attribute("src")
+ .unwrap_or("https://placehold.co/600x400".to_string());
+ let alt = el.get_attribute("alt");
+ let title = el.get_attribute("title");
+ let mut frags =
+ vec!["".to_string(), format!(r#"
"#)];
+ alt.map(|t| {
+ if !t.is_empty() {
+ frags.push(format!("Alt: {t}"))
+ }
+ });
+ title.map(|t| {
+ if !t.is_empty() {
+ frags.push(format!("Title: {t}"))
+ }
+ });
+ frags.push("".to_string());
+ el.replace(&frags.join("\n"), ContentType::Html);
+
+ Ok(())
+ }),
+ ],
..RewriteStrSettings::default()
},
)?)
@@ -272,6 +293,24 @@ pub fn sanitize_html(
cid_prefix: &str,
base_url: &Option,
) -> Result {
+ let inline_opts = InlineOptions {
+ inline_style_tags: true,
+ keep_style_tags: false,
+ keep_link_tags: false,
+ base_url: None,
+ load_remote_stylesheets: false,
+ extra_css: None,
+ preallocate_node_capacity: 32,
+ ..InlineOptions::default()
+ };
+
+ let html = match CSSInliner::new(inline_opts).inline(&html) {
+ Ok(inlined_html) => inlined_html,
+ Err(err) => {
+ error!("failed to inline CSS: {err}");
+ html.to_string()
+ }
+ };
let mut element_content_handlers = vec![
// Open links in new tab
element!("a[href]", |el| {
@@ -322,25 +361,13 @@ pub fn sanitize_html(
}),
]);
}
-
- let inline_opts = InlineOptions {
- inline_style_tags: true,
- keep_style_tags: false,
- keep_link_tags: false,
- base_url: None,
- load_remote_stylesheets: false,
- extra_css: None,
- preallocate_node_capacity: 32,
- ..InlineOptions::default()
- };
-
- let inlined_html = match CSSInliner::new(inline_opts).inline(&html) {
- Ok(inlined_html) => inlined_html,
- Err(err) => {
- error!("failed to inline CSS: {err}");
- html.to_string()
- }
- };
+ let html = rewrite_str(
+ &html,
+ RewriteStrSettings {
+ element_content_handlers,
+ ..RewriteStrSettings::default()
+ },
+ )?;
// Default's don't allow style, but we want to preserve that.
// TODO: remove 'class' if rendering mails moves to a two phase process where abstract message
// types are collected, santized, and then grouped together as one big HTML doc
@@ -388,6 +415,7 @@ pub fn sanitize_html(
"hgroup",
"hr",
"i",
+ "iframe", // wathiede
"img",
"ins",
"kbd",
@@ -396,6 +424,7 @@ pub fn sanitize_html(
"map",
"mark",
"nav",
+ "noscript", // wathiede
"ol",
"p",
"pre",
@@ -449,6 +478,9 @@ pub fn sanitize_html(
"hr" => hashset![
"align", "size", "width"
],
+ "iframe" => hashset![
+ "src", "allow", "allowfullscreen"
+ ],
"img" => hashset![
"align", "alt", "height", "src", "width"
],
@@ -484,21 +516,14 @@ pub fn sanitize_html(
],
];
- let rewritten_html = rewrite_str(
- &inlined_html,
- RewriteStrSettings {
- element_content_handlers,
- ..RewriteStrSettings::default()
- },
- )?;
- let clean_html = ammonia::Builder::default()
+ let html = ammonia::Builder::default()
.tags(tags)
.tag_attributes(tag_attributes)
.generic_attributes(attributes)
- .clean(&rewritten_html)
+ .clean(&html)
.to_string();
- Ok(clean_html)
+ Ok(html)
}
fn compute_offset_limit(
diff --git a/server/src/newsreader.rs b/server/src/newsreader.rs
index cafec74..3cced44 100644
--- a/server/src/newsreader.rs
+++ b/server/src/newsreader.rs
@@ -176,6 +176,10 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result> = vec![
Box::new(SlurpContents {
site_selectors: hashmap![
+ "blog.cloudflare.com".to_string() => vec![
+ Selector::parse(".author-lists").unwrap(),
+ Selector::parse(".post-full-content").unwrap()
+ ],
"hackaday.com".to_string() => vec![
Selector::parse("div.entry-featured-image").unwrap(),
Selector::parse("div.entry-content").unwrap()