server: escape RSS feeds that are HTML escaped
This commit is contained in:
parent
e0863ac085
commit
56bc1cf7ed
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -1427,6 +1427,15 @@ dependencies = [
|
|||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html-escape"
|
||||||
|
version = "0.2.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
|
||||||
|
dependencies = [
|
||||||
|
"utf8-width",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "html5ever"
|
name = "html5ever"
|
||||||
version = "0.26.0"
|
version = "0.26.0"
|
||||||
@ -3168,6 +3177,7 @@ dependencies = [
|
|||||||
"async-graphql-rocket",
|
"async-graphql-rocket",
|
||||||
"css-inline",
|
"css-inline",
|
||||||
"glog",
|
"glog",
|
||||||
|
"html-escape",
|
||||||
"linkify",
|
"linkify",
|
||||||
"log",
|
"log",
|
||||||
"lol_html",
|
"lol_html",
|
||||||
@ -4139,6 +4149,12 @@ version = "0.7.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8-width"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uuid"
|
name = "uuid"
|
||||||
version = "1.10.0"
|
version = "1.10.0"
|
||||||
|
|||||||
@ -30,4 +30,5 @@ maplit = "1.0.2"
|
|||||||
linkify = "0.10.0"
|
linkify = "0.10.0"
|
||||||
sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] }
|
sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] }
|
||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
|
html-escape = "0.2.13"
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,7 @@ use std::{convert::Infallible, str::Utf8Error, string::FromUtf8Error};
|
|||||||
use mailparse::MailParseError;
|
use mailparse::MailParseError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::SanitizeError;
|
use crate::TransformError;
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum ServerError {
|
pub enum ServerError {
|
||||||
@ -19,8 +19,8 @@ pub enum ServerError {
|
|||||||
PartNotFound,
|
PartNotFound,
|
||||||
#[error("sqlx error: {0}")]
|
#[error("sqlx error: {0}")]
|
||||||
SQLXError(#[from] sqlx::Error),
|
SQLXError(#[from] sqlx::Error),
|
||||||
#[error("html sanitize error: {0}")]
|
#[error("html transform error: {0}")]
|
||||||
SanitizeError(#[from] SanitizeError),
|
TransformError(#[from] TransformError),
|
||||||
#[error("UTF8 error: {0}")]
|
#[error("UTF8 error: {0}")]
|
||||||
Utf8Error(#[from] Utf8Error),
|
Utf8Error(#[from] Utf8Error),
|
||||||
#[error("FromUTF8 error: {0}")]
|
#[error("FromUTF8 error: {0}")]
|
||||||
|
|||||||
@ -11,14 +11,49 @@ use maplit::{hashmap, hashset};
|
|||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
// TODO: figure out how to use Cow
|
||||||
|
trait Transformer {
|
||||||
|
fn should_run(&self, input: &str) -> bool;
|
||||||
|
// TODO: should input be something like `html_escape` uses:
|
||||||
|
// <S: ?Sized + AsRef<str>>(text: &S) -> Cow<str>
|
||||||
|
fn transform(&self, input: &str) -> Result<String, TransformError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: how would we make this more generic to allow good implementations of Transformer outside
|
||||||
|
// of this module?
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum SanitizeError {
|
pub enum TransformError {
|
||||||
#[error("lol-html rewrite error")]
|
#[error("lol-html rewrite error")]
|
||||||
RewritingError(#[from] RewritingError),
|
RewritingError(#[from] RewritingError),
|
||||||
#[error("css inline error")]
|
#[error("css inline error")]
|
||||||
InlineError(#[from] InlineError),
|
InlineError(#[from] InlineError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct SanitizeHtml<'a> {
|
||||||
|
cid_prefix: &'a str,
|
||||||
|
base_url: &'a Option<Url>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Transformer for SanitizeHtml<'a> {
|
||||||
|
fn should_run(&self, _input: &str) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
fn transform(&self, input: &str) -> Result<String, TransformError> {
|
||||||
|
Ok(sanitize_html(input, self.cid_prefix, self.base_url)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct EscapeHtml;
|
||||||
|
|
||||||
|
impl Transformer for EscapeHtml {
|
||||||
|
fn should_run(&self, input: &str) -> bool {
|
||||||
|
input.starts_with("<")
|
||||||
|
}
|
||||||
|
fn transform(&self, input: &str) -> Result<String, TransformError> {
|
||||||
|
Ok(html_escape::decode_html_entities(input).to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn linkify_html(text: &str) -> String {
|
pub fn linkify_html(text: &str) -> String {
|
||||||
let mut finder = LinkFinder::new();
|
let mut finder = LinkFinder::new();
|
||||||
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
|
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
|
||||||
@ -51,7 +86,7 @@ pub fn sanitize_html(
|
|||||||
html: &str,
|
html: &str,
|
||||||
cid_prefix: &str,
|
cid_prefix: &str,
|
||||||
base_url: &Option<Url>,
|
base_url: &Option<Url>,
|
||||||
) -> Result<String, SanitizeError> {
|
) -> Result<String, TransformError> {
|
||||||
let mut element_content_handlers = vec![
|
let mut element_content_handlers = vec![
|
||||||
// Open links in new tab
|
// Open links in new tab
|
||||||
element!("a[href]", |el| {
|
element!("a[href]", |el| {
|
||||||
@ -86,10 +121,7 @@ pub fn sanitize_html(
|
|||||||
element_content_handlers.extend(vec![
|
element_content_handlers.extend(vec![
|
||||||
// Make links with relative URLs absolute
|
// Make links with relative URLs absolute
|
||||||
element!("a[href]", |el| {
|
element!("a[href]", |el| {
|
||||||
if let Some(Ok(href)) = el.get_attribute("href").map(|href| {
|
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
|
||||||
info!("href {href:?}");
|
|
||||||
base_url.join(&href)
|
|
||||||
}) {
|
|
||||||
el.set_attribute("href", &href.as_str()).unwrap();
|
el.set_attribute("href", &href.as_str()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +130,6 @@ pub fn sanitize_html(
|
|||||||
// Make images with relative srcs absolute
|
// Make images with relative srcs absolute
|
||||||
element!("img[src]", |el| {
|
element!("img[src]", |el| {
|
||||||
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
|
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
|
||||||
info!("src {src:?}");
|
|
||||||
el.set_attribute("src", &src.as_str()).unwrap();
|
el.set_attribute("src", &src.as_str()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ const THREAD_PREFIX: &'static str = "news:";
|
|||||||
use crate::{
|
use crate::{
|
||||||
error::ServerError,
|
error::ServerError,
|
||||||
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
|
graphql::{Body, Email, Html, Message, Tag, Thread, ThreadSummary},
|
||||||
sanitize_html,
|
EscapeHtml, SanitizeHtml, Transformer,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn is_newsreader_search(query: &str) -> bool {
|
pub fn is_newsreader_search(query: &str) -> bool {
|
||||||
@ -207,13 +207,24 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let html = r.summary.unwrap_or("NO SUMMARY".to_string());
|
let mut html = r.summary.unwrap_or("NO SUMMARY".to_string());
|
||||||
// TODO: add site specific cleanups. For example:
|
// TODO: add site specific cleanups. For example:
|
||||||
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
// * Grafana does <div class="image-wrapp"><img class="lazyload>"<img src="/media/...>"</img></div>
|
||||||
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
// * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolent
|
||||||
let html = sanitize_html(&html, "", &link)?;
|
let tranformers: Vec<Box<dyn Transformer>> = vec![
|
||||||
|
Box::new(EscapeHtml),
|
||||||
|
Box::new(SanitizeHtml {
|
||||||
|
cid_prefix: "",
|
||||||
|
base_url: &link,
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
for t in tranformers.iter() {
|
||||||
|
if t.should_run(&html) {
|
||||||
|
html = t.transform(&html)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
let body = Body::Html(Html {
|
let body = Body::Html(Html {
|
||||||
html,
|
html: html.to_string(),
|
||||||
content_tree: "".to_string(),
|
content_tree: "".to_string(),
|
||||||
});
|
});
|
||||||
let title = r.title.unwrap_or("NO TITLE".to_string());
|
let title = r.title.unwrap_or("NO TITLE".to_string());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user