From 9842c8c99c98102a4ec018eaa6403b17c8c34cf5 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sat, 25 Jan 2025 16:09:05 -0800 Subject: [PATCH] server: add option to inline CSS before slurping contents --- server/src/lib.rs | 75 +++++++++++++++++++++++++++++++++++++++- server/src/newsreader.rs | 16 ++++++--- web/index.html | 2 +- 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/server/src/lib.rs b/server/src/lib.rs index 69ab1a6..4e0bf30 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -110,6 +110,48 @@ impl Transformer for StripHtml { } } +struct InlineRemoteStyle<'a> { + base_url: &'a Option, +} + +#[async_trait] +impl<'a> Transformer for InlineRemoteStyle<'a> { + async fn transform(&self, _: &Option, html: &str) -> Result { + let css = concat!( + "/* chrome-default.css */\n", + include_str!("chrome-default.css"), + "\n/* mvp.css */\n", + include_str!("mvp.css"), + "\n/* Xinu Specific overrides */\n", + include_str!("custom.css"), + ); + let inline_opts = InlineOptions { + //inline_style_tags: true, + //keep_style_tags: false, + //keep_link_tags: true, + base_url: self.base_url.clone(), + //load_remote_stylesheets: true, + //preallocate_node_capacity: 32, + ..InlineOptions::default() + }; + + //info!("HTML:\n{html}"); + info!("base_url: {:#?}", self.base_url); + Ok( + match CSSInliner::options() + .base_url(self.base_url.clone()) + .build() + .inline(&html) + { + Ok(inlined_html) => inlined_html, + Err(err) => { + error!("failed to inline remote CSS: {err}"); + html.to_string() + } + }, + ) + } +} struct InlineStyle; #[async_trait] @@ -229,6 +271,7 @@ impl Transformer for AddOutlink { struct SlurpContents { cacher: Arc>, + inline_css: bool, site_selectors: HashMap>, } @@ -267,6 +310,36 @@ impl Transformer for SlurpContents { cacher.set(link.as_str(), body.as_bytes()); body }; + let body = Arc::new(body); + let base_url = Some(link.clone()); + let body = if self.inline_css { + let inner_body = Arc::clone(&body); + let res = tokio::task::spawn_blocking(move || { + let res = CSSInliner::options() + .base_url(base_url) + .build() + .inline(&inner_body); + + match res { + Ok(inlined_html) => inlined_html, + Err(err) => { + error!("failed to inline remote CSS: {err}"); + Arc::into_inner(inner_body).expect("failed to take body out of Arc") + } + } + }) + .await; + match res { + Ok(inlined_html) => inlined_html, + Err(err) => { + error!("failed to spawn inline remote CSS: {err}"); + Arc::into_inner(body).expect("failed to take body out of Arc") + } + } + } else { + Arc::into_inner(body).expect("failed to take body out of Arc") + }; + let doc = Html::parse_document(&body); let mut results = Vec::new(); @@ -277,7 +350,7 @@ impl Transformer for SlurpContents { //warn!("couldn't find '{:?}' in {}", selector, link); } } - Ok(results.join("
")) + Ok(results.join("
")) } } diff --git a/server/src/newsreader.rs b/server/src/newsreader.rs index c3024d5..586aca8 100644 --- a/server/src/newsreader.rs +++ b/server/src/newsreader.rs @@ -15,8 +15,8 @@ use crate::{ config::Config, error::ServerError, graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary}, - thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, - SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX, + thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineRemoteStyle, Query, + SanitizeHtml, SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX, NEWSREADER_THREAD_PREFIX, }; @@ -196,6 +196,8 @@ pub async fn thread( let body_tranformers: Vec> = vec![ Box::new(SlurpContents { cacher, + // TODO: make this true when bulma is finally removed + inline_css: false, site_selectors: hashmap![ "atmeta.com".to_string() => vec![ Selector::parse("div.entry-content").unwrap(), @@ -223,6 +225,9 @@ pub async fn thread( "ingowald.blog".to_string() => vec![ Selector::parse("article").unwrap(), ], + "jvns.ca".to_string() => vec![ + Selector::parse("article").unwrap(), + ], "mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()], "natwelch.com".to_string() => vec![ Selector::parse("article div.prose").unwrap(), @@ -234,6 +239,9 @@ pub async fn thread( Selector::parse("span.story-byline").unwrap(), Selector::parse("div.p").unwrap(), ], + "trofi.github.io".to_string() => vec![ + Selector::parse("#content").unwrap(), + ], "www.redox-os.org".to_string() => vec![ Selector::parse("div.content").unwrap(), ], @@ -245,12 +253,12 @@ pub async fn thread( }), Box::new(FrameImages), Box::new(AddOutlink), - Box::new(EscapeHtml), + // TODO: causes doubling of images in cloudflare blogs + //Box::new(EscapeHtml), Box::new(SanitizeHtml { cid_prefix: "", base_url: &link, }), - Box::new(InlineStyle), ]; for t in body_tranformers.iter() { if t.should_run(&link, &body) { diff --git a/web/index.html b/web/index.html index 5772e5e..a90ddb7 100644 --- a/web/index.html +++ b/web/index.html @@ -26,4 +26,4 @@
- + \ No newline at end of file