pub mod error; pub mod graphql; pub mod nm; use css_inline::{CSSInliner, InlineError, InlineOptions}; use linkify::{LinkFinder, LinkKind}; use log::error; use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings}; use maplit::{hashmap, hashset}; use thiserror::Error; #[derive(Error, Debug)] pub enum SanitizeError { #[error("lol-html rewrite error")] RewritingError(#[from] RewritingError), #[error("css inline error")] InlineError(#[from] InlineError), } pub fn linkify_html(text: &str) -> String { let mut finder = LinkFinder::new(); let finder = finder.url_must_have_scheme(false).kinds(&[LinkKind::Url]); let mut parts = Vec::new(); for span in finder.spans(text) { // TODO(wathiede): use Cow? match span.kind() { // Text as-is None => parts.push(span.as_str().to_string()), // Wrap in anchor tag Some(LinkKind::Url) => { let text = span.as_str(); let schema = if text.starts_with("http") { "" } else { "http://" }; let a = format!(r#"{0}"#, text); parts.push(a); } _ => todo!("unhandled kind: {:?}", span.kind().unwrap()), } } parts.join("") } pub fn sanitize_html(html: &str) -> Result { let element_content_handlers = vec![ // Open links in new tab element!("a[href]", |el| { el.set_attribute("target", "_blank").unwrap(); Ok(()) }), // Only secure image URLs element!("img[src]", |el| { let src = el .get_attribute("src") .expect("src was required") .replace("http:", "https:"); el.set_attribute("src", &src)?; Ok(()) }), ]; let inline_opts = InlineOptions { inline_style_tags: true, keep_style_tags: false, keep_link_tags: false, base_url: None, load_remote_stylesheets: false, extra_css: None, preallocate_node_capacity: 32, ..InlineOptions::default() }; let inlined_html = match CSSInliner::new(inline_opts).inline(&html) { Ok(inlined_html) => inlined_html, Err(err) => { error!("failed to inline CSS: {err}"); html.to_string() } }; // Default's don't allow style, but we want to preserve that. let attributes = hashset![ "align", "bgcolor", "class", "color", "height", "lang", "title", "width", "style", ]; let tags = hashset![ "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi", "bdo", "blockquote", "br", "caption", "center", "cite", "code", "col", "colgroup", "data", "dd", "del", "details", "dfn", "div", "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img", "ins", "kbd", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre", "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span", "strike", "strong", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead", "time", "title", // wathiede "tr", "tt", "u", "ul", "var", "wbr", ]; let tag_attributes = hashmap![ "a" => hashset![ "href", "hreflang" ], "bdo" => hashset![ "dir" ], "blockquote" => hashset![ "cite" ], "col" => hashset![ "align", "char", "charoff", "span" ], "colgroup" => hashset![ "align", "char", "charoff", "span" ], "del" => hashset![ "cite", "datetime" ], "hr" => hashset![ "align", "size", "width" ], "img" => hashset![ "align", "alt", "height", "src", "width" ], "ins" => hashset![ "cite", "datetime" ], "ol" => hashset![ "start" ], "q" => hashset![ "cite" ], "table" => hashset![ "align", "border", "cellpadding", "cellspacing", "char", "charoff", "summary", ], "tbody" => hashset![ "align", "char", "charoff" ], "td" => hashset![ "align", "char", "charoff", "colspan", "headers", "rowspan" ], "tfoot" => hashset![ "align", "char", "charoff" ], "th" => hashset![ "align", "char", "charoff", "colspan", "headers", "rowspan", "scope" ], "thead" => hashset![ "align", "char", "charoff" ], "tr" => hashset![ "align", "char", "charoff" ], ]; let clean_html = ammonia::Builder::default() .tags(tags) .tag_attributes(tag_attributes) .generic_attributes(attributes) .clean(&inlined_html) .to_string(); //let clean_html = inlined_html; Ok(rewrite_str( &clean_html, RewriteStrSettings { element_content_handlers, ..RewriteStrSettings::default() }, )?) }