Improve server side html sanitization.
This commit is contained in:
parent
304819275d
commit
8c47f01758
@ -2,7 +2,7 @@
|
||||
name = "server"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
default-bin = "server"
|
||||
default-run = "server"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -24,6 +24,9 @@ memmap = "0.7.0"
|
||||
mailparse = "0.14.0"
|
||||
ammonia = "3.3.0"
|
||||
lol_html = "1.2.0"
|
||||
css-inline = "0.13.0"
|
||||
anyhow = "1.0.79"
|
||||
maplit = "1.0.2"
|
||||
|
||||
[dependencies.rocket_contrib]
|
||||
version = "0.4.11"
|
||||
|
||||
16
server/src/bin/cleanhtml.rs
Normal file
16
server/src/bin/cleanhtml.rs
Normal file
@ -0,0 +1,16 @@
|
||||
use std::fs;
|
||||
|
||||
use server::sanitize_html;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let mut args = std::env::args().skip(1);
|
||||
let src = args.next().expect("source not specified");
|
||||
let dst = args.next().expect("destination not specified");
|
||||
println!("Sanitizing {src} into {dst}");
|
||||
let bytes = fs::read(src)?;
|
||||
let html = String::from_utf8_lossy(&bytes);
|
||||
let html = sanitize_html(&html)?;
|
||||
fs::write(dst, html)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -11,12 +11,13 @@ use async_graphql::{
|
||||
SimpleObject, Union,
|
||||
};
|
||||
use log::{error, info, warn};
|
||||
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
|
||||
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
|
||||
use memmap::MmapOptions;
|
||||
use notmuch::Notmuch;
|
||||
use rocket::time::Instant;
|
||||
|
||||
use crate::sanitize_html;
|
||||
|
||||
pub struct QueryRoot;
|
||||
|
||||
/// # Number of seconds since the Epoch
|
||||
@ -190,25 +191,6 @@ struct Tag {
|
||||
bg_color: String,
|
||||
unread: usize,
|
||||
}
|
||||
fn sanitize_html(html: &str) -> Result<String, RewritingError> {
|
||||
let element_content_handlers = vec![
|
||||
// Open links in new tab
|
||||
element!("a[href]", |el| {
|
||||
el.set_attribute("target", "_blank").unwrap();
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
];
|
||||
|
||||
Ok(rewrite_str(
|
||||
// TODO(wathiede): replace ammonia with more lol-html rules.
|
||||
&ammonia::clean(&html),
|
||||
RewriteStrSettings {
|
||||
element_content_handlers,
|
||||
..RewriteStrSettings::default()
|
||||
},
|
||||
)?)
|
||||
}
|
||||
|
||||
#[Object]
|
||||
impl QueryRoot {
|
||||
|
||||
@ -1,3 +1,215 @@
|
||||
pub mod error;
|
||||
pub mod graphql;
|
||||
pub mod nm;
|
||||
|
||||
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
||||
use log::error;
|
||||
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
|
||||
use maplit::{hashmap, hashset};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SanitizeError {
|
||||
#[error("lol-html rewrite error")]
|
||||
RewritingError(#[from] RewritingError),
|
||||
#[error("css inline error")]
|
||||
InlineError(#[from] InlineError),
|
||||
}
|
||||
|
||||
pub fn sanitize_html(html: &str) -> Result<String, SanitizeError> {
|
||||
let element_content_handlers = vec![
|
||||
// Open links in new tab
|
||||
element!("a[href]", |el| {
|
||||
el.set_attribute("target", "_blank").unwrap();
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
// Only secure image URLs
|
||||
element!("img[src]", |el| {
|
||||
let src = el
|
||||
.get_attribute("src")
|
||||
.expect("src was required")
|
||||
.replace("http:", "https:");
|
||||
|
||||
el.set_attribute("src", &src)?;
|
||||
|
||||
Ok(())
|
||||
}),
|
||||
];
|
||||
|
||||
let inline_opts = InlineOptions {
|
||||
inline_style_tags: true,
|
||||
keep_style_tags: false,
|
||||
keep_link_tags: false,
|
||||
base_url: None,
|
||||
load_remote_stylesheets: false,
|
||||
extra_css: None,
|
||||
preallocate_node_capacity: 32,
|
||||
..InlineOptions::default()
|
||||
};
|
||||
|
||||
let inlined_html = match CSSInliner::new(inline_opts).inline(&html) {
|
||||
Ok(inlined_html) => inlined_html,
|
||||
Err(err) => {
|
||||
error!("failed to inline CSS: {err}");
|
||||
html.to_string()
|
||||
}
|
||||
};
|
||||
// Default's don't allow style, but we want to preserve that.
|
||||
let attributes =
|
||||
hashset!["align", "bgcolor", "color", "height", "lang", "title", "width", "style",];
|
||||
|
||||
let tags = hashset![
|
||||
"a",
|
||||
"abbr",
|
||||
"acronym",
|
||||
"area",
|
||||
"article",
|
||||
"aside",
|
||||
"b",
|
||||
"bdi",
|
||||
"bdo",
|
||||
"blockquote",
|
||||
"br",
|
||||
"caption",
|
||||
"center",
|
||||
"cite",
|
||||
"code",
|
||||
"col",
|
||||
"colgroup",
|
||||
"data",
|
||||
"dd",
|
||||
"del",
|
||||
"details",
|
||||
"dfn",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"header",
|
||||
"hgroup",
|
||||
"hr",
|
||||
"i",
|
||||
"img",
|
||||
"ins",
|
||||
"kbd",
|
||||
"kbd",
|
||||
"li",
|
||||
"map",
|
||||
"mark",
|
||||
"nav",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"q",
|
||||
"rp",
|
||||
"rt",
|
||||
"rtc",
|
||||
"ruby",
|
||||
"s",
|
||||
"samp",
|
||||
"small",
|
||||
"span",
|
||||
"strike",
|
||||
"strong",
|
||||
"sub",
|
||||
"summary",
|
||||
"sup",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"th",
|
||||
"thead",
|
||||
"time",
|
||||
"title", // wathiede
|
||||
"tr",
|
||||
"tt",
|
||||
"u",
|
||||
"ul",
|
||||
"var",
|
||||
"wbr",
|
||||
];
|
||||
let tag_attributes = hashmap![
|
||||
"a" => hashset![
|
||||
"href", "hreflang"
|
||||
],
|
||||
"bdo" => hashset![
|
||||
"dir"
|
||||
],
|
||||
"blockquote" => hashset![
|
||||
"cite"
|
||||
],
|
||||
"col" => hashset![
|
||||
"align", "char", "charoff", "span"
|
||||
],
|
||||
"colgroup" => hashset![
|
||||
"align", "char", "charoff", "span"
|
||||
],
|
||||
"del" => hashset![
|
||||
"cite", "datetime"
|
||||
],
|
||||
"hr" => hashset![
|
||||
"align", "size", "width"
|
||||
],
|
||||
"img" => hashset![
|
||||
"align", "alt", "height", "src", "width"
|
||||
],
|
||||
"ins" => hashset![
|
||||
"cite", "datetime"
|
||||
],
|
||||
"ol" => hashset![
|
||||
"start"
|
||||
],
|
||||
"q" => hashset![
|
||||
"cite"
|
||||
],
|
||||
"table" => hashset![
|
||||
"align", "border", "cellpadding", "cellspacing", "char", "charoff", "summary",
|
||||
],
|
||||
"tbody" => hashset![
|
||||
"align", "char", "charoff"
|
||||
],
|
||||
"td" => hashset![
|
||||
"align", "char", "charoff", "colspan", "headers", "rowspan"
|
||||
],
|
||||
"tfoot" => hashset![
|
||||
"align", "char", "charoff"
|
||||
],
|
||||
"th" => hashset![
|
||||
"align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
|
||||
],
|
||||
"thead" => hashset![
|
||||
"align", "char", "charoff"
|
||||
],
|
||||
"tr" => hashset![
|
||||
"align", "char", "charoff"
|
||||
],
|
||||
];
|
||||
|
||||
let clean_html = ammonia::Builder::default()
|
||||
.tags(tags)
|
||||
.tag_attributes(tag_attributes)
|
||||
.generic_attributes(attributes)
|
||||
.clean(&inlined_html)
|
||||
.to_string();
|
||||
//let clean_html = inlined_html;
|
||||
|
||||
Ok(rewrite_str(
|
||||
// TODO(wathiede): replace ammonia with more lol-html rules.
|
||||
// &ammonia::clean(&html),
|
||||
&clean_html,
|
||||
RewriteStrSettings {
|
||||
element_content_handlers,
|
||||
..RewriteStrSettings::default()
|
||||
},
|
||||
)?)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user