Make URL joining more robust

This commit is contained in:
Bill Thiede 2024-07-22 16:39:59 -07:00
parent ad8fb77857
commit bfd5e12bea
3 changed files with 40 additions and 33 deletions

View File

@ -5,7 +5,7 @@ pub mod nm;
use css_inline::{CSSInliner, InlineError, InlineOptions}; use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind}; use linkify::{LinkFinder, LinkKind};
use log::error; use log::{error, info};
use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings}; use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings};
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use thiserror::Error; use thiserror::Error;
@ -50,31 +50,15 @@ pub fn linkify_html(text: &str) -> String {
pub fn sanitize_html( pub fn sanitize_html(
html: &str, html: &str,
cid_prefix: &str, cid_prefix: &str,
base_url: &Url, base_url: &Option<Url>,
) -> Result<String, SanitizeError> { ) -> Result<String, SanitizeError> {
let element_content_handlers = vec![ let mut element_content_handlers = vec![
// Open links in new tab // Open links in new tab
element!("a[href]", |el| { element!("a[href]", |el| {
el.set_attribute("target", "_blank").unwrap(); el.set_attribute("target", "_blank").unwrap();
Ok(()) Ok(())
}), }),
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| base_url.join(&href)) {
el.set_attribute("href", &href.as_str()).unwrap();
}
Ok(())
}),
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
el.set_attribute("src", &src.as_str()).unwrap();
}
Ok(())
}),
// Replace mixed part CID images with URL // Replace mixed part CID images with URL
element!("img[src]", |el| { element!("img[src]", |el| {
let src = el let src = el
@ -98,6 +82,30 @@ pub fn sanitize_html(
Ok(()) Ok(())
}), }),
]; ];
if let Some(base_url) = base_url {
element_content_handlers.extend(vec![
// Make links with relative URLs absolute
element!("a[href]", |el| {
if let Some(Ok(href)) = el.get_attribute("href").map(|href| {
info!("href {href:?}");
base_url.join(&href)
}) {
el.set_attribute("href", &href.as_str()).unwrap();
}
Ok(())
}),
// Make images with relative srcs absolute
element!("img[src]", |el| {
if let Some(Ok(src)) = el.get_attribute("src").map(|src| base_url.join(&src)) {
info!("src {src:?}");
el.set_attribute("src", &src.as_str()).unwrap();
}
Ok(())
}),
]);
}
let inline_opts = InlineOptions { let inline_opts = InlineOptions {
inline_style_tags: true, inline_style_tags: true,

View File

@ -5,7 +5,6 @@ use std::{
}; };
use async_graphql::connection::{self, Connection, Edge}; use async_graphql::connection::{self, Connection, Edge};
use log::info;
use sqlx::postgres::PgPool; use sqlx::postgres::PgPool;
use url::Url; use url::Url;
@ -185,8 +184,8 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
}) })
.unwrap_or(default_homepage.to_string()), .unwrap_or(default_homepage.to_string()),
)?; )?;
let link = Url::parse( let link = &r
&r.link .link
.as_ref() .as_ref()
.map(|h| { .map(|h| {
if h.is_empty() { if h.is_empty() {
@ -195,8 +194,8 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result<Thread, ServerEr
h.to_string() h.to_string()
} }
}) })
.unwrap_or(default_homepage.to_string()), .map(|h| Url::parse(&h).ok())
)?; .flatten();
let addr = r.link.as_ref().map(|link| { let addr = r.link.as_ref().map(|link| {
if link.contains('@') { if link.contains('@') {
link.clone() link.clone()

View File

@ -179,7 +179,7 @@ pub async fn thread(
.get_first_value("date") .get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok()); .and_then(|d| mailparse::dateparse(&d).ok());
let cid_prefix = shared::urls::cid_prefix(None, &id); let cid_prefix = shared::urls::cid_prefix(None, &id);
let base_url = Url::parse("https://there-should-be-no-relative-urls-in-email").unwrap(); let base_url = None;
let body = match extract_body(&m, &id)? { let body = match extract_body(&m, &id)? {
Body::PlainText(PlainText { text, content_tree }) => { Body::PlainText(PlainText { text, content_tree }) => {
let text = if text.len() > MAX_RAW_MESSAGE_SIZE { let text = if text.len() > MAX_RAW_MESSAGE_SIZE {