From 3a5a9bd66aaaa86c00ad552d0f9a3f7ce37af1c9 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Fri, 5 Jul 2024 10:38:12 -0700 Subject: [PATCH] Add support for inline images --- server/src/bin/server.rs | 21 +++++++++- server/src/graphql.rs | 90 +++++++++++++++++++++++++++++++--------- server/src/lib.rs | 39 +++++++++++------ web/Trunk.toml | 2 + 4 files changed, 119 insertions(+), 33 deletions(-) diff --git a/server/src/bin/server.rs b/server/src/bin/server.rs index 6e5e096..7bc5308 100644 --- a/server/src/bin/server.rs +++ b/server/src/bin/server.rs @@ -16,7 +16,9 @@ use rocket::{ use rocket_cors::{AllowedHeaders, AllowedOrigins}; use server::{ error::ServerError, - graphql::{attachment_bytes, Attachment, GraphqlSchema, Mutation, QueryRoot}, + graphql::{ + attachment_bytes, cid_attachment_bytes, Attachment, GraphqlSchema, Mutation, QueryRoot, + }, }; #[get("/refresh")] @@ -111,6 +113,22 @@ impl<'r, 'o: 'r> Responder<'r, 'o> for DownloadAttachmentResponder { } } +#[get("/cid//")] +async fn view_cid( + nm: &State, + id: &str, + cid: &str, +) -> Result> { + let mid = if id.starts_with("id:") { + id.to_string() + } else { + format!("id:{}", id) + }; + info!("view cid attachment {mid} {cid}"); + let attachment = cid_attachment_bytes(nm, &mid, &cid)?; + Ok(InlineAttachmentResponder(attachment)) +} + #[get("/view/attachment///<_>")] async fn view_attachment( nm: &State, @@ -224,6 +242,7 @@ async fn main() -> Result<(), Box> { graphql_query, graphql_request, graphiql, + view_cid, view_attachment, download_attachment, ], diff --git a/server/src/graphql.rs b/server/src/graphql.rs index 262a885..e1ccdd3 100644 --- a/server/src/graphql.rs +++ b/server/src/graphql.rs @@ -337,6 +337,7 @@ impl QueryRoot { .headers .get_first_value("date") .and_then(|d| mailparse::dateparse(&d).ok()); + let cid_prefix = format!("/cid/{id}/"); let body = match extract_body(&m, &id)? { Body::PlainText(PlainText { text, content_tree }) => { let text = if text.len() > MAX_RAW_MESSAGE_SIZE { @@ -355,7 +356,7 @@ impl QueryRoot { // Trim newlines to prevent excessive white space at the beginning/end of // presenation. Leave tabs and spaces incase plain text attempts to center a // header on the first line. - sanitize_html(&linkify_html(&text.trim_matches('\n')))? + sanitize_html(&linkify_html(&text.trim_matches('\n')), &cid_prefix)? ), content_tree: if debug_content_tree { render_content_type_tree(&m) @@ -365,7 +366,7 @@ impl QueryRoot { }) } Body::Html(Html { html, content_tree }) => Body::Html(Html { - html: sanitize_html(&html)?, + html: sanitize_html(&html, &cid_prefix)?, content_tree: if debug_content_tree { render_content_type_tree(&m) } else { @@ -671,7 +672,13 @@ fn flatten_body_parts(parts: &[Body]) -> Body { fn extract_related(m: &ParsedMail, part_addr: &mut Vec) -> Result { // TODO(wathiede): collect related things and change return type to new Body arm. - let handled_types = vec![MULTIPART_ALTERNATIVE, TEXT_HTML, TEXT_PLAIN]; + let handled_types = vec![ + MULTIPART_ALTERNATIVE, + TEXT_HTML, + TEXT_PLAIN, + IMAGE_JPEG, + IMAGE_PNG, + ]; let mut unhandled_types: Vec<_> = m .subparts .iter() @@ -679,8 +686,21 @@ fn extract_related(m: &ParsedMail, part_addr: &mut Vec) -> Result) -> Result Option>( +fn walk_attachments Option + Copy>( m: &ParsedMail, visitor: F, ) -> Option { let mut cur_addr = Vec::new(); + walk_attachments_inner(m, visitor, &mut cur_addr) +} + +fn walk_attachments_inner Option + Copy>( + m: &ParsedMail, + visitor: F, + cur_addr: &mut Vec, +) -> Option { for (idx, sp) in m.subparts.iter().enumerate() { cur_addr.push(idx); let val = visitor(sp, &cur_addr); if val.is_some() { return val; } + let val = walk_attachments_inner(sp, visitor, cur_addr); + if val.is_some() { + return val; + } cur_addr.pop(); } None @@ -784,26 +816,18 @@ fn get_attachment_filename(header_value: &str) -> &str { } fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option { - for h in headers { - if h.get_key() == "Content-Type" { - let v = h.get_value(); - if let Some(idx) = v.find(';') { - return Some(v[..idx].to_string()); - } else { - return Some(v); - } + if let Some(v) = headers.get_first_value("Content-Type") { + if let Some(idx) = v.find(';') { + return Some(v[..idx].to_string()); + } else { + return Some(v); } } None } fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option { - for h in headers { - if h.get_key() == "Content-ID" { - return Some(h.get_value()); - } - } - None + headers.get_first_value("Content-Id") } fn render_content_type_tree(m: &ParsedMail) -> String { @@ -903,6 +927,34 @@ fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result Result { + let files = nm.files(id)?; + let Some(path) = files.first() else { + warn!("failed to find files for message {id}"); + return Err(ServerError::PartNotFound); + }; + let file = File::open(&path)?; + let mmap = unsafe { MmapOptions::new().map(&file)? }; + let m = parse_mail(&mmap)?; + if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| { + info!("{cid} {:?}", get_content_id(&sp.headers)); + if let Some(h_cid) = get_content_id(&sp.headers) { + let h_cid = &h_cid[1..h_cid.len() - 1]; + if h_cid == cid { + let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment { + ..Attachment::default() + }); + return Some(attachment); + } + } + None + }) { + return Ok(attachment); + } + + Err(ServerError::PartNotFound) +} + pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result { let files = nm.files(id)?; let Some(path) = files.first() else { diff --git a/server/src/lib.rs b/server/src/lib.rs index 2b9af68..9e12e8d 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -4,7 +4,7 @@ pub mod nm; use css_inline::{CSSInliner, InlineError, InlineOptions}; use linkify::{LinkFinder, LinkKind}; -use log::error; +use log::{error, info}; use lol_html::{element, errors::RewritingError, rewrite_str, RewriteStrSettings}; use maplit::{hashmap, hashset}; use thiserror::Error; @@ -43,7 +43,9 @@ pub fn linkify_html(text: &str) -> String { parts.join("") } -pub fn sanitize_html(html: &str) -> Result { +// html contains the content to be cleaned, and cid_prefix is used to resolve mixed part image +// referrences +pub fn sanitize_html(html: &str, cid_prefix: &str) -> Result { let element_content_handlers = vec![ // Open links in new tab element!("a[href]", |el| { @@ -51,6 +53,17 @@ pub fn sanitize_html(html: &str) -> Result { Ok(()) }), + // Replace mixed part CID images with URL + element!("img[src]", |el| { + let src = el + .get_attribute("src") + .expect("src was required") + .replace("cid:", cid_prefix); + + el.set_attribute("src", &src)?; + + Ok(()) + }), // Only secure image URLs element!("img[src]", |el| { let src = el @@ -225,19 +238,19 @@ pub fn sanitize_html(html: &str) -> Result { ], ]; - let clean_html = ammonia::Builder::default() - .tags(tags) - .tag_attributes(tag_attributes) - .generic_attributes(attributes) - .clean(&inlined_html) - .to_string(); - //let clean_html = inlined_html; - - Ok(rewrite_str( - &clean_html, + let rewritten_html = rewrite_str( + &inlined_html, RewriteStrSettings { element_content_handlers, ..RewriteStrSettings::default() }, - )?) + )?; + let clean_html = ammonia::Builder::default() + .tags(tags) + .tag_attributes(tag_attributes) + .generic_attributes(attributes) + .clean(&rewritten_html) + .to_string(); + + Ok(clean_html) } diff --git a/web/Trunk.toml b/web/Trunk.toml index ca16f3d..e5e1e26 100644 --- a/web/Trunk.toml +++ b/web/Trunk.toml @@ -10,6 +10,8 @@ port = 6758 backend = "http://localhost:9345/" rewrite= "/api/" [[proxy]] +backend="http://localhost:9345/cid" +[[proxy]] backend="http://localhost:9345/original" [[proxy]] backend="http://localhost:9345/graphiql"