server: add option to inline CSS before slurping contents

This commit is contained in:
Bill Thiede 2025-01-25 16:09:05 -08:00
parent 906ebd73b2
commit 9842c8c99c
3 changed files with 87 additions and 6 deletions

View File

@ -110,6 +110,48 @@ impl Transformer for StripHtml {
} }
} }
struct InlineRemoteStyle<'a> {
base_url: &'a Option<Url>,
}
#[async_trait]
impl<'a> Transformer for InlineRemoteStyle<'a> {
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
let css = concat!(
"/* chrome-default.css */\n",
include_str!("chrome-default.css"),
"\n/* mvp.css */\n",
include_str!("mvp.css"),
"\n/* Xinu Specific overrides */\n",
include_str!("custom.css"),
);
let inline_opts = InlineOptions {
//inline_style_tags: true,
//keep_style_tags: false,
//keep_link_tags: true,
base_url: self.base_url.clone(),
//load_remote_stylesheets: true,
//preallocate_node_capacity: 32,
..InlineOptions::default()
};
//info!("HTML:\n{html}");
info!("base_url: {:#?}", self.base_url);
Ok(
match CSSInliner::options()
.base_url(self.base_url.clone())
.build()
.inline(&html)
{
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline remote CSS: {err}");
html.to_string()
}
},
)
}
}
struct InlineStyle; struct InlineStyle;
#[async_trait] #[async_trait]
@ -229,6 +271,7 @@ impl Transformer for AddOutlink {
struct SlurpContents { struct SlurpContents {
cacher: Arc<Mutex<FilesystemCacher>>, cacher: Arc<Mutex<FilesystemCacher>>,
inline_css: bool,
site_selectors: HashMap<String, Vec<Selector>>, site_selectors: HashMap<String, Vec<Selector>>,
} }
@ -267,6 +310,36 @@ impl Transformer for SlurpContents {
cacher.set(link.as_str(), body.as_bytes()); cacher.set(link.as_str(), body.as_bytes());
body body
}; };
let body = Arc::new(body);
let base_url = Some(link.clone());
let body = if self.inline_css {
let inner_body = Arc::clone(&body);
let res = tokio::task::spawn_blocking(move || {
let res = CSSInliner::options()
.base_url(base_url)
.build()
.inline(&inner_body);
match res {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline remote CSS: {err}");
Arc::into_inner(inner_body).expect("failed to take body out of Arc")
}
}
})
.await;
match res {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to spawn inline remote CSS: {err}");
Arc::into_inner(body).expect("failed to take body out of Arc")
}
}
} else {
Arc::into_inner(body).expect("failed to take body out of Arc")
};
let doc = Html::parse_document(&body); let doc = Html::parse_document(&body);
let mut results = Vec::new(); let mut results = Vec::new();
@ -277,7 +350,7 @@ impl Transformer for SlurpContents {
//warn!("couldn't find '{:?}' in {}", selector, link); //warn!("couldn't find '{:?}' in {}", selector, link);
} }
} }
Ok(results.join("<hr>")) Ok(results.join("<br>"))
} }
} }

View File

@ -15,8 +15,8 @@ use crate::{
config::Config, config::Config,
error::ServerError, error::ServerError,
graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary}, graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml, thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineRemoteStyle, Query,
SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX, SanitizeHtml, SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
NEWSREADER_THREAD_PREFIX, NEWSREADER_THREAD_PREFIX,
}; };
@ -196,6 +196,8 @@ pub async fn thread(
let body_tranformers: Vec<Box<dyn Transformer>> = vec![ let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents { Box::new(SlurpContents {
cacher, cacher,
// TODO: make this true when bulma is finally removed
inline_css: false,
site_selectors: hashmap![ site_selectors: hashmap![
"atmeta.com".to_string() => vec![ "atmeta.com".to_string() => vec![
Selector::parse("div.entry-content").unwrap(), Selector::parse("div.entry-content").unwrap(),
@ -223,6 +225,9 @@ pub async fn thread(
"ingowald.blog".to_string() => vec![ "ingowald.blog".to_string() => vec![
Selector::parse("article").unwrap(), Selector::parse("article").unwrap(),
], ],
"jvns.ca".to_string() => vec![
Selector::parse("article").unwrap(),
],
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()], "mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
"natwelch.com".to_string() => vec![ "natwelch.com".to_string() => vec![
Selector::parse("article div.prose").unwrap(), Selector::parse("article div.prose").unwrap(),
@ -234,6 +239,9 @@ pub async fn thread(
Selector::parse("span.story-byline").unwrap(), Selector::parse("span.story-byline").unwrap(),
Selector::parse("div.p").unwrap(), Selector::parse("div.p").unwrap(),
], ],
"trofi.github.io".to_string() => vec![
Selector::parse("#content").unwrap(),
],
"www.redox-os.org".to_string() => vec![ "www.redox-os.org".to_string() => vec![
Selector::parse("div.content").unwrap(), Selector::parse("div.content").unwrap(),
], ],
@ -245,12 +253,12 @@ pub async fn thread(
}), }),
Box::new(FrameImages), Box::new(FrameImages),
Box::new(AddOutlink), Box::new(AddOutlink),
Box::new(EscapeHtml), // TODO: causes doubling of images in cloudflare blogs
//Box::new(EscapeHtml),
Box::new(SanitizeHtml { Box::new(SanitizeHtml {
cid_prefix: "", cid_prefix: "",
base_url: &link, base_url: &link,
}), }),
Box::new(InlineStyle),
]; ];
for t in body_tranformers.iter() { for t in body_tranformers.iter() {
if t.should_run(&link, &body) { if t.should_run(&link, &body) {