242 lines
5.7 KiB
Rust
242 lines
5.7 KiB
Rust
pub mod error;
|
|
pub mod graphql;
|
|
pub mod nm;
|
|
|
|
use css_inline::{CSSInliner, InlineError, InlineOptions};
|
|
use linkify::{LinkFinder, LinkKind};
|
|
use log::error;
|
|
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
|
|
use maplit::{hashmap, hashset};
|
|
use thiserror::Error;
|
|
|
|
#[derive(Error, Debug)]
|
|
pub enum SanitizeError {
|
|
#[error("lol-html rewrite error")]
|
|
RewritingError(#[from] RewritingError),
|
|
#[error("css inline error")]
|
|
InlineError(#[from] InlineError),
|
|
}
|
|
|
|
pub fn linkify_html(text: &str) -> String {
|
|
let mut finder = LinkFinder::new();
|
|
let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]);
|
|
let mut parts = Vec::new();
|
|
for span in finder.spans(text) {
|
|
// TODO(wathiede): use Cow<str>?
|
|
match span.kind() {
|
|
// Text as-is
|
|
None => parts.push(span.as_str().to_string()),
|
|
// Wrap in anchor tag
|
|
Some(LinkKind::Url) => {
|
|
let text = span.as_str();
|
|
let schema = if text.starts_with("http") {
|
|
""
|
|
} else {
|
|
"http://"
|
|
};
|
|
let a = format!(r#"<a href="{schema}{0}">{0}</a>"#, text);
|
|
parts.push(a);
|
|
}
|
|
_ => todo!("unhandled kind: {:?}", span.kind().unwrap()),
|
|
}
|
|
}
|
|
parts.join("")
|
|
}
|
|
|
|
pub fn sanitize_html(html: &str) -> Result<String, SanitizeError> {
|
|
let element_content_handlers = vec![
|
|
// Open links in new tab
|
|
element!("a[href]", |el| {
|
|
el.set_attribute("target", "_blank").unwrap();
|
|
|
|
Ok(())
|
|
}),
|
|
// Only secure image URLs
|
|
element!("img[src]", |el| {
|
|
let src = el
|
|
.get_attribute("src")
|
|
.expect("src was required")
|
|
.replace("http:", "https:");
|
|
|
|
el.set_attribute("src", &src)?;
|
|
|
|
Ok(())
|
|
}),
|
|
];
|
|
|
|
let inline_opts = InlineOptions {
|
|
inline_style_tags: true,
|
|
keep_style_tags: false,
|
|
keep_link_tags: false,
|
|
base_url: None,
|
|
load_remote_stylesheets: false,
|
|
extra_css: None,
|
|
preallocate_node_capacity: 32,
|
|
..InlineOptions::default()
|
|
};
|
|
|
|
let inlined_html = match CSSInliner::new(inline_opts).inline(&html) {
|
|
Ok(inlined_html) => inlined_html,
|
|
Err(err) => {
|
|
error!("failed to inline CSS: {err}");
|
|
html.to_string()
|
|
}
|
|
};
|
|
// Default's don't allow style, but we want to preserve that.
|
|
let attributes = hashset![
|
|
"align", "bgcolor", "class", "color", "height", "lang", "title", "width", "style",
|
|
];
|
|
|
|
let tags = hashset![
|
|
"a",
|
|
"abbr",
|
|
"acronym",
|
|
"area",
|
|
"article",
|
|
"aside",
|
|
"b",
|
|
"bdi",
|
|
"bdo",
|
|
"blockquote",
|
|
"br",
|
|
"caption",
|
|
"center",
|
|
"cite",
|
|
"code",
|
|
"col",
|
|
"colgroup",
|
|
"data",
|
|
"dd",
|
|
"del",
|
|
"details",
|
|
"dfn",
|
|
"div",
|
|
"dl",
|
|
"dt",
|
|
"em",
|
|
"figcaption",
|
|
"figure",
|
|
"footer",
|
|
"h1",
|
|
"h2",
|
|
"h3",
|
|
"h4",
|
|
"h5",
|
|
"h6",
|
|
"header",
|
|
"hgroup",
|
|
"hr",
|
|
"i",
|
|
"img",
|
|
"ins",
|
|
"kbd",
|
|
"kbd",
|
|
"li",
|
|
"map",
|
|
"mark",
|
|
"nav",
|
|
"ol",
|
|
"p",
|
|
"pre",
|
|
"q",
|
|
"rp",
|
|
"rt",
|
|
"rtc",
|
|
"ruby",
|
|
"s",
|
|
"samp",
|
|
"small",
|
|
"span",
|
|
"strike",
|
|
"strong",
|
|
"sub",
|
|
"summary",
|
|
"sup",
|
|
"table",
|
|
"tbody",
|
|
"td",
|
|
"th",
|
|
"thead",
|
|
"time",
|
|
"title", // wathiede
|
|
"tr",
|
|
"tt",
|
|
"u",
|
|
"ul",
|
|
"var",
|
|
"wbr",
|
|
];
|
|
let tag_attributes = hashmap![
|
|
"a" => hashset![
|
|
"href", "hreflang"
|
|
],
|
|
"bdo" => hashset![
|
|
"dir"
|
|
],
|
|
"blockquote" => hashset![
|
|
"cite"
|
|
],
|
|
"col" => hashset![
|
|
"align", "char", "charoff", "span"
|
|
],
|
|
"colgroup" => hashset![
|
|
"align", "char", "charoff", "span"
|
|
],
|
|
"del" => hashset![
|
|
"cite", "datetime"
|
|
],
|
|
"hr" => hashset![
|
|
"align", "size", "width"
|
|
],
|
|
"img" => hashset![
|
|
"align", "alt", "height", "src", "width"
|
|
],
|
|
"ins" => hashset![
|
|
"cite", "datetime"
|
|
],
|
|
"ol" => hashset![
|
|
"start"
|
|
],
|
|
"q" => hashset![
|
|
"cite"
|
|
],
|
|
"table" => hashset![
|
|
"align", "border", "cellpadding", "cellspacing", "char", "charoff", "summary",
|
|
],
|
|
"tbody" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"td" => hashset![
|
|
"align", "char", "charoff", "colspan", "headers", "rowspan"
|
|
],
|
|
"tfoot" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"th" => hashset![
|
|
"align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
|
|
],
|
|
"thead" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
"tr" => hashset![
|
|
"align", "char", "charoff"
|
|
],
|
|
];
|
|
|
|
let clean_html = ammonia::Builder::default()
|
|
.tags(tags)
|
|
.tag_attributes(tag_attributes)
|
|
.generic_attributes(attributes)
|
|
.clean(&inlined_html)
|
|
.to_string();
|
|
//let clean_html = inlined_html;
|
|
|
|
Ok(rewrite_str(
|
|
&clean_html,
|
|
RewriteStrSettings {
|
|
element_content_handlers,
|
|
..RewriteStrSettings::default()
|
|
},
|
|
)?)
|
|
}
|