diff --git a/Cargo.lock b/Cargo.lock index c7a4bf3..c3e613b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3183,6 +3183,7 @@ dependencies = [ "sqlx", "thiserror", "tokio", + "url", "urlencoding", ] diff --git a/server/Cargo.toml b/server/Cargo.toml index 2a30a58..b270683 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -29,4 +29,5 @@ anyhow = "1.0.79" maplit = "1.0.2" linkify = "0.10.0" sqlx = { version = "0.7.4", features = ["postgres", "runtime-tokio", "time"] } +url = "2.5.2" diff --git a/server/sql/thread.sql b/server/sql/thread.sql index 242d600..ee76a1b 100644 --- a/server/sql/thread.sql +++ b/server/sql/thread.sql @@ -5,7 +5,8 @@ SELECT site, summary, title, - name + name, + homepage FROM post p JOIN feed f ON p.site = f.slug diff --git a/server/src/error.rs b/server/src/error.rs index 869e901..bee0e9d 100644 --- a/server/src/error.rs +++ b/server/src/error.rs @@ -7,26 +7,28 @@ use crate::SanitizeError; #[derive(Error, Debug)] pub enum ServerError { - #[error("notmuch")] + #[error("notmuch: {0}")] NotmuchError(#[from] notmuch::NotmuchError), #[error("flatten")] FlattenError, - #[error("mail parse error")] + #[error("mail parse error: {0}")] MailParseError(#[from] MailParseError), - #[error("IO error")] + #[error("IO error: {0}")] IoError(#[from] std::io::Error), #[error("attachement not found")] PartNotFound, - #[error("sqlx error")] + #[error("sqlx error: {0}")] SQLXError(#[from] sqlx::Error), - #[error("html sanitize error")] + #[error("html sanitize error: {0}")] SanitizeError(#[from] SanitizeError), - #[error("UTF8 error")] + #[error("UTF8 error: {0}")] Utf8Error(#[from] Utf8Error), - #[error("FromUTF8 error")] + #[error("FromUTF8 error: {0}")] FromUtf8Error(#[from] FromUtf8Error), - #[error("error")] + #[error("error: {0}")] StringError(String), - #[error("impossible")] + #[error("invalid url: {0}")] + UrlParseError(#[from] url::ParseError), + #[error("impossible: {0}")] InfaillibleError(#[from] Infallible), } diff --git a/server/src/lib.rs b/server/src/lib.rs index b631921..6d65015 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -9,6 +9,7 @@ use log::error; use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings}; use maplit::{hashmap, hashset}; use thiserror::Error; +use url::Url; #[derive(Error, Debug)] pub enum SanitizeError { @@ -46,7 +47,11 @@ pub fn linkify_html(text: &str) -> String { // html contains the content to be cleaned, and cid_prefix is used to resolve mixed part image // referrences -pub fn sanitize_html(html: &str, cid_prefix: &str) -> Result { +pub fn sanitize_html( + html: &str, + cid_prefix: &str, + base_url: &Url, +) -> Result { let element_content_handlers = vec![ // Open links in new tab element!("a[href]", |el| { @@ -54,6 +59,22 @@ pub fn sanitize_html(html: &str, cid_prefix: &str) -> Result bool { @@ -174,14 +176,54 @@ pub async fn thread(pool: &PgPool, thread_id: String) -> Result + // * Some sites appear to be HTML encoded, unencode them, i.e. imperialviolet + let html = sanitize_html(&html, "", &link)?; let body = Body::Html(Html { - html: r.summary.unwrap_or("NO SUMMARY".to_string()), + html, content_tree: "".to_string(), }); let title = r.title.unwrap_or("NO TITLE".to_string()); let from = Some(Email { name: r.name, - addr: r.link, + addr: addr.map(|a| a.to_string()), }); Ok(Thread { thread_id, diff --git a/server/src/nm.rs b/server/src/nm.rs index 1064dbb..af1e6c8 100644 --- a/server/src/nm.rs +++ b/server/src/nm.rs @@ -10,6 +10,7 @@ use log::{error, info, warn}; use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail}; use memmap::MmapOptions; use notmuch::Notmuch; +use url::Url; use crate::{ error::ServerError, @@ -178,6 +179,7 @@ pub async fn thread( .get_first_value("date") .and_then(|d| mailparse::dateparse(&d).ok()); let cid_prefix = shared::urls::cid_prefix(None, &id); + let base_url = Url::parse("https://there-should-be-no-relative-urls-in-email").unwrap(); let body = match extract_body(&m, &id)? { Body::PlainText(PlainText { text, content_tree }) => { let text = if text.len() > MAX_RAW_MESSAGE_SIZE { @@ -196,7 +198,11 @@ pub async fn thread( // Trim newlines to prevent excessive white space at the beginning/end of // presenation. Leave tabs and spaces incase plain text attempts to center a // header on the first line. - sanitize_html(&linkify_html(&text.trim_matches('\n')), &cid_prefix)? + sanitize_html( + &linkify_html(&text.trim_matches('\n')), + &cid_prefix, + &base_url + )? ), content_tree: if debug_content_tree { render_content_type_tree(&m) @@ -206,7 +212,7 @@ pub async fn thread( }) } Body::Html(Html { html, content_tree }) => Body::Html(Html { - html: sanitize_html(&html, &cid_prefix)?, + html: sanitize_html(&html, &cid_prefix, &base_url)?, content_tree: if debug_content_tree { render_content_type_tree(&m) } else {