server: escape RSS feeds that are HTML escaped
This commit is contained in:
parent
e0863ac085
commit
56bc1cf7ed
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -1427,6 +1427,15 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html-escape"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
|
||||
dependencies = [
|
||||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.26.0"
|
||||
@ -3168,6 +3177,7 @@ dependencies = [
|
||||
"async-graphql-rocket",
|
||||
"css-inline",
|
||||
"glog",
|
||||
"html-escape",
|
||||
"linkify",
|
||||
"log",
|
||||
"lol_html",
|
||||
@ -4139,6 +4149,12 @@ version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.10.0"
|
||||
|
||||
@ -30,4 +30,5 @@ maplit = "1.0.2"
|
||||
linkify = "0.10.0"
|
||||
sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] }
|
||||
url = "2.5.2"
|
||||
html-escape = "0.2.13"
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
||||
use mailparse::MailParseError;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::SanitizeError;
|
||||
use crate::TransformError;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ServerError {
|
||||
@ -19,8 +19,8 @@ pub enum ServerError {
|
||||
PartNotFound,
|
||||
#[error("sqlx error: {0}")]
|
||||
SQLXError(#[from] sqlx::Error),
|
||||
#[error("html sanitize error: {0}")]
|
||||
SanitizeError(#[from] SanitizeError),
|
||||
#[error("html transform error: {0}")]
|
||||
TransformError(#[from] TransformError),
|
||||
#[error("UTF8 error: {0}")]
|
||||
Utf8Error(#[from] Utf8Error),
|
||||
#[error("FromUTF8 error: {0}")]
|
||||
|
||||
@ -11,14 +11,49 @@ use maplit::{hashmap, hashset};
|
||||
use thiserror::Error;
|
||||
use url::Url;
|
||||
|
||||
// TODO: figure out how to use Cow
|
||||
trait Transformer {
|
||||
fn should_run(&self, input: &str) -> bool;
|
||||
// TODO: should input be something like `html_escape` uses:
|
||||
// <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str>
|
||||
fn transform(&self, input: &str) -> Result<String, TransformError>;
|
||||
}
|
||||
|
||||
// TODO: how would we make this more generic to allow good implementations of Transformer outside
|
||||
// of this module?
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SanitizeError {
|
||||
pub enum TransformError {
|
||||
#[error("lol-html rewrite error")]
|
||||
RewritingError(#[from] RewritingError),
|
||||
#[error("css inline error")]
|
||||
InlineError(#[from] InlineError),
|
||||
}
|
||||
|
||||
struct SanitizeHtml<'a> {
|
||||
cid_prefix: &'a str,
|
||||
base_url: &'a Option<Url>,
|
||||
}
|
||||
|
||||
impl<'a> Transformer for SanitizeHtml<'a> {
|
||||
fn should_run(&self, _input: &str) -> bool {
|
||||
true
|
||||
}
|
||||
fn transform(&self, input: &str) -> Result<String, TransformError> {
|
||||
Ok(sanitize_html(input, self.cid_prefix, self.base_url)?)
|
||||
}
|
||||
}
|
||||
|
||||
struct EscapeHtml;
|
||||
|
||||
impl Transformer for EscapeHtml {
|
||||
fn should_run(&self, input: &str) -> bool {
|
||||
input.starts_with("<")
|
||||
}
|
||||
fn transform(&self, input: &str) -> Result<String, TransformError> {
|
||||
Ok(html_escape::decode_html_entities(input).to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn linkify_html(text: &str) -> String {
|
||||
let mut finder = LinkFinder::new();
|
||||
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
|
||||
@ -51,7 +86,7 @@ pub fn sanitize_html(
|
||||
html: &str,
|
||||
cid_prefix: &str,
|
||||
base_url: &Option<Url>,
|
||||
) -> Result<String, SanitizeError> {
|
||||
) -> Result<String, TransformError> {
|
||||
let mut element_content_handlers = vec![
|
||||
// Open links in new tab
|
||||
element!("a[href]", |el| {
|
||||
@ -86,10 +121,7 @@ pub fn sanitize_html(
|
||||
element_content_handlers.extend(vec![
|
||||
// Make links with relative URLs absolute
|
||||
element!("a[href]", |el| {
|
||||
if let Some(Ok(href)) = el.get_attribute("href").map(|href| {
|
||||
info!("href {href:?}");
|
||||
base_url.join(&href)
|
||||
}) {
|
||||
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
|
||||
el.set_attribute("href", &href.as_str()).unwrap();
|
||||
}
|
||||
|
||||
@ -98,7 +130,6 @@ pub fn sanitize_html(
|
||||
// Make images with relative srcs absolute
|
||||
element!("img[src]", |el| {
|
||||
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
|
||||
info!("src {src:?}");
|
||||
el.set_attribute("src", &src.as_str()).unwrap();
|
||||
}
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ const THREAD_PREFIX: &'static str = "news:";
|
||||
use crate::{
|
||||
error::ServerError,
|
||||
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
|
||||
sanitize_html,
|
||||
EscapeHtml, SanitizeHtml, Transformer,
|
||||
};
|
||||
|
||||
pub fn is_newsreader_search(query: &str) -> bool {
|
||||
@ -207,13 +207,24 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
|
||||
}
|
||||
}
|
||||
});
|
||||
let html = r.summary.unwrap_or("NO SUMMARY".to_string());
|
||||
let mut html = r.summary.unwrap_or("NO SUMMARY".to_string());
|
||||
// TODO: add site specific cleanups. For example:
|
||||
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
||||
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
||||
let html = sanitize_html(&html, "", &link)?;
|
||||
let tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||
Box::new(EscapeHtml),
|
||||
Box::new(SanitizeHtml {
|
||||
cid_prefix: "",
|
||||
base_url: &link,
|
||||
}),
|
||||
];
|
||||
for t in tranformers.iter() {
|
||||
if t.should_run(&html) {
|
||||
html = t.transform(&html)?;
|
||||
}
|
||||
}
|
||||
let body = Body::Html(Html {
|
||||
html,
|
||||
html: html.to_string(),
|
||||
content_tree: "".to_string(),
|
||||
});
|
||||
let title = r.title.unwrap_or("NO TITLE".to_string());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user