server: add option to inline CSS before slurping contents

This commit is contained in:
Bill Thiede 2025-01-25 16:09:05 -08:00
parent 906ebd73b2
commit 9842c8c99c
3 changed files with 87 additions and 6 deletions

View File

@ -110,6 +110,48 @@ impl Transformer for StripHtml {
}
}
struct InlineRemoteStyle<'a> {
base_url: &'a Option<Url>,
}
#[async_trait]
impl<'a> Transformer for InlineRemoteStyle<'a> {
async fn transform(&self, _: &Option<Url>, html: &str) -> Result<String, TransformError> {
let css = concat!(
"/* chrome-default.css */\n",
include_str!("chrome-default.css"),
"\n/* mvp.css */\n",
include_str!("mvp.css"),
"\n/* Xinu Specific overrides */\n",
include_str!("custom.css"),
);
let inline_opts = InlineOptions {
//inline_style_tags: true,
//keep_style_tags: false,
//keep_link_tags: true,
base_url: self.base_url.clone(),
//load_remote_stylesheets: true,
//preallocate_node_capacity: 32,
..InlineOptions::default()
};
//info!("HTML:\n{html}");
info!("base_url: {:#?}", self.base_url);
Ok(
match CSSInliner::options()
.base_url(self.base_url.clone())
.build()
.inline(&html)
{
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline remote CSS: {err}");
html.to_string()
}
},
)
}
}
struct InlineStyle;
#[async_trait]
@ -229,6 +271,7 @@ impl Transformer for AddOutlink {
struct SlurpContents {
cacher: Arc<Mutex<FilesystemCacher>>,
inline_css: bool,
site_selectors: HashMap<String, Vec<Selector>>,
}
@ -267,6 +310,36 @@ impl Transformer for SlurpContents {
cacher.set(link.as_str(), body.as_bytes());
body
};
let body = Arc::new(body);
let base_url = Some(link.clone());
let body = if self.inline_css {
let inner_body = Arc::clone(&body);
let res = tokio::task::spawn_blocking(move || {
let res = CSSInliner::options()
.base_url(base_url)
.build()
.inline(&inner_body);
match res {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to inline remote CSS: {err}");
Arc::into_inner(inner_body).expect("failed to take body out of Arc")
}
}
})
.await;
match res {
Ok(inlined_html) => inlined_html,
Err(err) => {
error!("failed to spawn inline remote CSS: {err}");
Arc::into_inner(body).expect("failed to take body out of Arc")
}
}
} else {
Arc::into_inner(body).expect("failed to take body out of Arc")
};
let doc = Html::parse_document(&body);
let mut results = Vec::new();
@ -277,7 +350,7 @@ impl Transformer for SlurpContents {
//warn!("couldn't find '{:?}' in {}", selector, link);
}
}
Ok(results.join("<hr>"))
Ok(results.join("<br>"))
}
}

View File

@ -15,8 +15,8 @@ use crate::{
config::Config,
error::ServerError,
graphql::{Corpus, NewsPost, Tag, Thread, ThreadSummary},
thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineStyle, Query, SanitizeHtml,
SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
thread_summary_from_row, AddOutlink, EscapeHtml, FrameImages, InlineRemoteStyle, Query,
SanitizeHtml, SlurpContents, ThreadSummaryRecord, Transformer, NEWSREADER_TAG_PREFIX,
NEWSREADER_THREAD_PREFIX,
};
@ -196,6 +196,8 @@ pub async fn thread(
let body_tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(SlurpContents {
cacher,
// TODO: make this true when bulma is finally removed
inline_css: false,
site_selectors: hashmap![
"atmeta.com".to_string() => vec![
Selector::parse("div.entry-content").unwrap(),
@ -223,6 +225,9 @@ pub async fn thread(
"ingowald.blog".to_string() => vec![
Selector::parse("article").unwrap(),
],
"jvns.ca".to_string() => vec![
Selector::parse("article").unwrap(),
],
"mitchellh.com".to_string() => vec![Selector::parse("div.w-full").unwrap()],
"natwelch.com".to_string() => vec![
Selector::parse("article div.prose").unwrap(),
@ -234,6 +239,9 @@ pub async fn thread(
Selector::parse("span.story-byline").unwrap(),
Selector::parse("div.p").unwrap(),
],
"trofi.github.io".to_string() => vec![
Selector::parse("#content").unwrap(),
],
"www.redox-os.org".to_string() => vec![
Selector::parse("div.content").unwrap(),
],
@ -245,12 +253,12 @@ pub async fn thread(
}),
Box::new(FrameImages),
Box::new(AddOutlink),
Box::new(EscapeHtml),
// TODO: causes doubling of images in cloudflare blogs
//Box::new(EscapeHtml),
Box::new(SanitizeHtml {
cid_prefix: "",
base_url: &link,
}),
Box::new(InlineStyle),
];
for t in body_tranformers.iter() {
if t.should_run(&link, &body) {

View File

@ -26,4 +26,4 @@
<section id="app"></section>
</body>
</html>
</html>