server: escape RSS feeds that are HTML escaped

This commit is contained in:
2024-08-03 11:29:20 -07:00
parent e0863ac085
commit 56bc1cf7ed
5 changed files with 73 additions and 14 deletions

View File

@@ -14,7 +14,7 @@ const THREAD_PREFIX: &'static str = "news:";
use crate::{
error::ServerError,
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
sanitize_html,
EscapeHtml, SanitizeHtml, Transformer,
};
pub fn is_newsreader_search(query: &str) -> bool {
@@ -207,13 +207,24 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
}
}
});
let html = r.summary.unwrap_or("NO SUMMARY".to_string());
let mut html = r.summary.unwrap_or("NO SUMMARY".to_string());
// TODO: add site specific cleanups. For example:
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
let html = sanitize_html(&html, "", &link)?;
let tranformers: Vec<Box<dyn Transformer>> = vec![
Box::new(EscapeHtml),
Box::new(SanitizeHtml {
cid_prefix: "",
base_url: &link,
}),
];
for t in tranformers.iter() {
if t.should_run(&html) {
html = t.transform(&html)?;
}
}
let body = Body::Html(Html {
html,
html: html.to_string(),
content_tree: "".to_string(),
});
let title = r.title.unwrap_or("NO TITLE".to_string());