server: escape RSS feeds that are HTML escaped

This commit is contained in:
2024-08-03 11:29:20 -07:00
parent e0863ac085
commit 56bc1cf7ed
5 changed files with 73 additions and 14 deletions

View File

@@ -11,14 +11,49 @@ use maplit::{hashmap, hashset};
use thiserror::Error;
use url::Url;
// TODO: figure out how to use Cow
trait Transformer {
fn should_run(&self, input: &str) -> bool;
// TODO: should input be something like `html_escape` uses:
// <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str>
fn transform(&self, input: &str) -> Result<String, TransformError>;
}
// TODO: how would we make this more generic to allow good implementations of Transformer outside
// of this module?
#[derive(Error, Debug)]
pub enum SanitizeError {
pub enum TransformError {
#[error("lol-html rewrite error")]
RewritingError(#[from] RewritingError),
#[error("css inline error")]
InlineError(#[from] InlineError),
}
struct SanitizeHtml<'a> {
cid_prefix: &'a str,
base_url: &'a Option<Url>,
}
impl<'a> Transformer for SanitizeHtml<'a> {
fn should_run(&self, _input: &str) -> bool {
true
}
fn transform(&self, input: &str) -> Result<String, TransformError> {
Ok(sanitize_html(input, self.cid_prefix, self.base_url)?)
}
}
struct EscapeHtml;
impl Transformer for EscapeHtml {
fn should_run(&self, input: &str) -> bool {
input.starts_with("&lt")
}
fn transform(&self, input: &str) -> Result<String, TransformError> {
Ok(html_escape::decode_html_entities(input).to_string())
}
}
pub fn linkify_html(text: &str) -> String {
let mut finder = LinkFinder::new();
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
@@ -51,7 +86,7 @@ pub fn sanitize_html(
html: &str,
cid_prefix: &str,
base_url: &Option<Url>,
) -> Result<String, SanitizeError> {
) -> Result<String, TransformError> {
let mut element_content_handlers = vec![
// Open links in new tab
element!("a[href]", |el| {
@@ -86,10 +121,7 @@ pub fn sanitize_html(
element_content_handlers.extend(vec![
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| {
info!("href {href:?}");
base_url.join(&href)
}) {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
el.set_attribute("href", &href.as_str()).unwrap();
}
@@ -98,7 +130,6 @@ pub fn sanitize_html(
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
info!("src {src:?}");
el.set_attribute("src", &src.as_str()).unwrap();
}