Make URL joining more robust

This commit is contained in:
Bill Thiede 2024-07-22 16:39:59 -07:00
parent ad8fb77857
commit bfd5e12bea
3 changed files with 40 additions and 33 deletions

View File

@ -5,7 +5,7 @@ pub mod nm;
use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind};
use log::error;
use log::{error, info};
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
use maplit::{hashmap, hashset};
use thiserror::Error;
@ -50,31 +50,15 @@ pub fn linkify_html(text: &str) -> String {
pub fn sanitize_html(
html: &str,
cid_prefix: &str,
base_url: &Url,
base_url: &Option<Url>,
) -> Result<String, SanitizeError> {
let element_content_handlers = vec![
let mut element_content_handlers = vec![
// Open links in new tab
element!("a[href]", |el| {
el.set_attribute("target", "_blank").unwrap();
Ok(())
}),
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
el.set_attribute("href", &href.as_str()).unwrap();
}
Ok(())
}),
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
el.set_attribute("src", &src.as_str()).unwrap();
}
Ok(())
}),
// Replace mixed part CID images with URL
element!("img[src]", |el| {
let src = el
@ -98,6 +82,30 @@ pub fn sanitize_html(
Ok(())
}),
];
if let Some(base_url) = base_url {
element_content_handlers.extend(vec![
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| {
info!("href {href:?}");
base_url.join(&href)
}) {
el.set_attribute("href", &href.as_str()).unwrap();
}
Ok(())
}),
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
info!("src {src:?}");
el.set_attribute("src", &src.as_str()).unwrap();
}
Ok(())
}),
]);
}
let inline_opts = InlineOptions {
inline_style_tags: true,

View File

@ -5,7 +5,6 @@ use std::{
};
use async_graphql::connection::{self, Connection, Edge};
use log::info;
use sqlx::postgres::PgPool;
use url::Url;
@ -185,18 +184,18 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
})
.unwrap_or(default_homepage.to_string()),
)?;
let link = Url::parse(
&r.link
.as_ref()
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h.to_string()
}
})
.unwrap_or(default_homepage.to_string()),
)?;
let link = &r
.link
.as_ref()
.map(|h| {
if h.is_empty() {
default_homepage.to_string()
} else {
h.to_string()
}
})
.map(|h| Url::parse(&h).ok())
.flatten();
let addr = r.link.as_ref().map(|link| {
if link.contains('@') {
link.clone()

View File

@ -179,7 +179,7 @@ pub async fn thread(
.get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok());
let cid_prefix = shared::urls::cid_prefix(None, &id);
let base_url = Url::parse("https://there-should-be-no-relative-urls-in-email").unwrap();
let base_url = None;
let body = match extract_body(&m, &id)? {
Body::PlainText(PlainText { text, content_tree }) => {
let text = if text.len() > MAX_RAW_MESSAGE_SIZE {