From c74cd668261ccd1ba71a3af0d505954673a43268 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sun, 24 Mar 2024 18:11:15 -0700 Subject: [PATCH] server: add ability to view inline image attachments --- server/src/bin/server.rs | 82 +++++++-- server/src/error.rs | 7 + server/src/graphql.rs | 351 ++++++++++++++++++++++++++++----------- web/Cargo.toml | 1 - web/Trunk.toml | 4 + web/graphql/schema.json | 85 +++++++++- 6 files changed, 410 insertions(+), 120 deletions(-) diff --git a/server/src/bin/server.rs b/server/src/bin/server.rs index 0086c50..2dc9cc3 100644 --- a/server/src/bin/server.rs +++ b/server/src/bin/server.rs @@ -16,7 +16,7 @@ use rocket::{ use rocket_cors::{AllowedHeaders, AllowedOrigins}; use server::{ error::ServerError, - graphql::{GraphqlSchema, Mutation, QueryRoot}, + graphql::{attachment_bytes, Attachment, GraphqlSchema, Mutation, QueryRoot}, }; #[get("/refresh")] @@ -69,12 +69,34 @@ async fn show(nm: &State, query: &str) -> Result, Debug Ok(Json(res)) } -struct PartResponder { +struct InlineAttachmentResponder(Attachment); + +impl<'r, 'o: 'r> Responder<'r, 'o> for InlineAttachmentResponder { + fn respond_to(self, _: &'r Request<'_>) -> rocket::response::Result<'o> { + let mut resp = Response::build(); + if let Some(filename) = self.0.filename { + info!("filename {:?}", filename); + resp.header(Header::new( + "Content-Disposition", + format!(r#"inline; filename="{}""#, filename), + )); + } + if let Some(content_type) = self.0.content_type { + if let Some(ct) = ContentType::parse_flexible(&content_type) { + resp.header(ct); + } + } + resp.sized_body(self.0.bytes.len(), Cursor::new(self.0.bytes)) + .ok() + } +} + +struct DownloadPartResponder { bytes: Vec, filename: Option, } -impl<'r, 'o: 'r> Responder<'r, 'o> for PartResponder { +impl<'r, 'o: 'r> Responder<'r, 'o> for DownloadPartResponder { fn respond_to(self, _: &'r Request<'_>) -> rocket::response::Result<'o> { let mut resp = Response::build(); if let Some(filename) = self.filename { @@ -90,22 +112,49 @@ impl<'r, 'o: 'r> Responder<'r, 'o> for PartResponder { } } -#[get("/attachment//")] -async fn attachment( - _nm: &State, +#[get("/view/attachment///<_>")] +async fn view_attachment( + nm: &State, id: &str, - idx: usize, -) -> Result> { - let _idx = idx; - let _mid = if id.starts_with("id:") { + idx: &str, +) -> Result> { + let mid = if id.starts_with("id:") { id.to_string() } else { format!("id:{}", id) }; - let bytes = Vec::new(); - let filename = None; + info!("view attachment {mid} {idx}"); + let idx: Vec<_> = idx + .split('.') + .map(|s| s.parse().expect("not a usize")) + .collect(); + let attachment = attachment_bytes(nm, &mid, &idx)?; + // TODO: plumb Content-Type, or just create wrappers for serving the Attachment type + Ok(InlineAttachmentResponder(attachment)) +} + +#[get("/download/attachment///<_>")] +async fn download_attachment( + nm: &State, + id: &str, + idx: &str, +) -> Result> { + let mid = if id.starts_with("id:") { + id.to_string() + } else { + format!("id:{}", id) + }; + info!("download attachment {mid} {idx}"); + let idx: Vec<_> = idx + .split('.') + .map(|s| s.parse().expect("not a usize")) + .collect(); + let attachment = attachment_bytes(nm, &mid, &idx)?; // TODO(wathiede): use walk_attachments from graphql to fill this out - Ok(PartResponder { bytes, filename }) + Ok(DownloadPartResponder { + bytes: attachment.bytes, + filename: attachment.filename, + }) } #[get("/original//part/")] @@ -113,7 +162,7 @@ async fn original_part( nm: &State, id: &str, part: usize, -) -> Result> { +) -> Result> { let mid = if id.starts_with("id:") { id.to_string() } else { @@ -121,7 +170,7 @@ async fn original_part( }; let meta = nm.show_part(&mid, part)?; let res = nm.show_original_part(&mid, part)?; - Ok(PartResponder { + Ok(DownloadPartResponder { bytes: res, filename: meta.filename, }) @@ -201,7 +250,8 @@ async fn main() -> Result<(), Box> { graphql_query, graphql_request, graphiql, - attachment + view_attachment, + download_attachment, ], ) .attach(cors) diff --git a/server/src/error.rs b/server/src/error.rs index 1c0c705..f9552c3 100644 --- a/server/src/error.rs +++ b/server/src/error.rs @@ -1,3 +1,4 @@ +use mailparse::MailParseError; use thiserror::Error; #[derive(Error, Debug)] @@ -6,4 +7,10 @@ pub enum ServerError { NotmuchError(#[from] notmuch::NotmuchError), #[error("flatten")] FlattenError, + #[error("mail parse error")] + MailParseError(#[from] MailParseError), + #[error("IO error")] + IoError(#[from] std::io::Error), + #[error("attachement not found")] + PartNotFound, } diff --git a/server/src/graphql.rs b/server/src/graphql.rs index 8d31f76..c0c79f9 100644 --- a/server/src/graphql.rs +++ b/server/src/graphql.rs @@ -1,3 +1,4 @@ +const MAX_RAW_MESSAGE_SIZE: usize = 100_000; use std::{ collections::HashMap, fs::File, @@ -15,7 +16,7 @@ use memmap::MmapOptions; use notmuch::Notmuch; use rocket::time::Instant; -use crate::{linkify_html, sanitize_html}; +use crate::{error::ServerError, linkify_html, sanitize_html}; /// # Number of seconds since the Epoch pub type UnixTime = isize; @@ -25,6 +26,8 @@ pub type ThreadId = String; const TEXT_PLAIN: &'static str = "text/plain"; const TEXT_HTML: &'static str = "text/html"; +const IMAGE_JPEG: &'static str = "image/jpeg"; +const IMAGE_PNG: &'static str = "image/png"; const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative"; const MULTIPART_MIXED: &'static str = "multipart/mixed"; const MULTIPART_RELATED: &'static str = "multipart/related"; @@ -81,30 +84,14 @@ pub struct Message { // Content-Transfer-Encoding: base64 // Content-ID: // X-Attachment-Id: f_lponoluo1 -#[derive(Debug, SimpleObject)] +#[derive(Default, Debug, SimpleObject)] pub struct Attachment { - filename: String, - content_type: Option, - content_id: Option, -} - -#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)] -enum DispositionType { - Inline, - Attachment, -} - -impl FromStr for DispositionType { - type Err = String; - - // Required method - fn from_str(s: &str) -> Result { - Ok(match s { - "inline" => DispositionType::Inline, - "attachment" => DispositionType::Attachment, - c => return Err(format!("unknown disposition type: {c}")), - }) - } + pub filename: Option, + pub size: Option, + pub content_type: Option, + pub content_id: Option, + pub disposition: DispositionType, + pub bytes: Vec, } #[derive(Debug, SimpleObject)] @@ -345,18 +332,30 @@ impl QueryRoot { .headers .get_first_value("date") .and_then(|d| mailparse::dateparse(&d).ok()); - let body = match extract_body(&m)? { - Body::PlainText(PlainText { text, content_tree }) => Body::Html(Html { - html: format!( - r#"

{}

"#, - sanitize_html(&linkify_html(&text))? - ), - content_tree: if debug_content_tree { - render_content_type_tree(&m) + let body = match extract_body(&m, &id)? { + Body::PlainText(PlainText { text, content_tree }) => { + let text = if text.len() > MAX_RAW_MESSAGE_SIZE { + format!( + "{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes", + &text[..MAX_RAW_MESSAGE_SIZE], + MAX_RAW_MESSAGE_SIZE + ) } else { - content_tree - }, - }), + text + }; + + Body::Html(Html { + html: format!( + r#"

{}

"#, + sanitize_html(&linkify_html(&text))? + ), + content_tree: if debug_content_tree { + render_content_type_tree(&m) + } else { + content_tree + }, + }) + } Body::Html(Html { html, content_tree }) => Body::Html(Html { html: sanitize_html(&html)?, content_tree: if debug_content_tree { @@ -428,13 +427,46 @@ impl Mutation { } } -fn extract_body(m: &ParsedMail) -> Result { +pub type GraphqlSchema = Schema; + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Disposition { + pub r#type: DispositionType, + pub filename: Option, + pub size: Option, +} + +#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)] +pub enum DispositionType { + Inline, + Attachment, +} + +impl From for DispositionType { + fn from(value: mailparse::DispositionType) -> Self { + match value { + mailparse::DispositionType::Inline => DispositionType::Inline, + mailparse::DispositionType::Attachment => DispositionType::Attachment, + dt => panic!("unhandled DispositionType {dt:?}"), + } + } +} + +impl Default for DispositionType { + fn default() -> Self { + DispositionType::Attachment + } +} + +fn extract_body(m: &ParsedMail, id: &str) -> Result { + let mut part_addr = Vec::new(); + part_addr.push(id.to_string()); let body = m.get_body()?; let ret = match m.ctype.mimetype.as_str() { TEXT_PLAIN => return Ok(Body::text(body)), TEXT_HTML => return Ok(Body::html(body)), - MULTIPART_MIXED => extract_mixed(m), - MULTIPART_ALTERNATIVE => extract_alternative(m), + MULTIPART_MIXED => extract_mixed(m, &mut part_addr), + MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr), _ => extract_unhandled(m), }; if let Err(err) = ret { @@ -457,7 +489,29 @@ fn extract_unhandled(m: &ParsedMail) -> Result { // multipart/alternative defines multiple representations of the same message, and clients should // show the fanciest they can display. For this program, the priority is text/html, text/plain, // then give up. -fn extract_alternative(m: &ParsedMail) -> Result { +fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec) -> Result { + let handled_types = vec![ + MULTIPART_ALTERNATIVE, + MULTIPART_MIXED, + MULTIPART_RELATED, + TEXT_HTML, + TEXT_PLAIN, + ]; + for sp in &m.subparts { + if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE { + return extract_alternative(sp, part_addr); + } + } + for sp in &m.subparts { + if sp.ctype.mimetype.as_str() == MULTIPART_MIXED { + return extract_related(sp, part_addr); + } + } + for sp in &m.subparts { + if sp.ctype.mimetype.as_str() == MULTIPART_RELATED { + return extract_related(sp, part_addr); + } + } for sp in &m.subparts { if sp.ctype.mimetype.as_str() == TEXT_HTML { let body = sp.get_body()?; @@ -470,26 +524,23 @@ fn extract_alternative(m: &ParsedMail) -> Result { return Ok(Body::text(body)); } } - for sp in &m.subparts { - if sp.ctype.mimetype.as_str() == MULTIPART_RELATED { - return extract_related(sp); - } - } Err(format!( "extract_alternative failed to find suitable subpart, searched: {:?}", - vec![TEXT_HTML, TEXT_PLAIN] + handled_types ) .into()) } // multipart/mixed defines multiple types of context all of which should be presented to the user // 'serially'. -fn extract_mixed(m: &ParsedMail) -> Result { +fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec) -> Result { let handled_types = vec![ MULTIPART_ALTERNATIVE, MULTIPART_RELATED, TEXT_HTML, TEXT_PLAIN, + IMAGE_JPEG, + IMAGE_PNG, ]; let mut unhandled_types: Vec<_> = m .subparts @@ -498,33 +549,64 @@ fn extract_mixed(m: &ParsedMail) -> Result { .filter(|mt| !handled_types.contains(&mt)) .collect(); unhandled_types.sort(); - warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}"); - for sp in &m.subparts { - if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE { - return extract_alternative(sp); - } + if !unhandled_types.is_empty() { + warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}"); } - for sp in &m.subparts { - if sp.ctype.mimetype == MULTIPART_RELATED { - return extract_related(sp); - } - } - for sp in &m.subparts { - let body = sp.get_body()?; + let mut parts = Vec::new(); + for (idx, sp) in m.subparts.iter().enumerate() { + part_addr.push(idx.to_string()); match sp.ctype.mimetype.as_str() { - TEXT_PLAIN => return Ok(Body::text(body)), - TEXT_HTML => return Ok(Body::html(body)), + MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?), + MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?), + TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)), + TEXT_HTML => parts.push(Body::html(sp.get_body()?)), + IMAGE_JPEG | IMAGE_PNG => { + let filename = { + let pcd = sp.get_content_disposition(); + pcd.params + .get("filename") + .map(|s| s.clone()) + .unwrap_or("".to_string()) + }; + parts.push(Body::html(format!( + r#""#, + part_addr[0], + part_addr + .iter() + .skip(1) + .map(|i| i.to_string()) + .collect::>() + .join(".") + ))) + } _ => (), } + part_addr.pop(); } - Err(format!( - "extract_mixed failed to find suitable subpart, searched: {:?}", - handled_types - ) - .into()) + Ok(flatten_body_parts(&parts)) } -fn extract_related(m: &ParsedMail) -> Result { +fn flatten_body_parts(parts: &[Body]) -> Body { + let html = parts + .iter() + .map(|p| match p { + Body::PlainText(PlainText { text, .. }) => format!( + r#"

{}

"#, + linkify_html(&text) + ), + Body::Html(Html { html, .. }) => html.clone(), + Body::UnhandledContentType(UnhandledContentType { text }) => { + format!(r#"

{text}

"#) + } + }) + .collect::>() + .join("\n"); + + info!("flatten_body_parts {} {html}", parts.len()); + Body::html(html) +} + +fn extract_related(m: &ParsedMail, _part_addr: &mut Vec) -> Result { // TODO(wathiede): collect related things and change return type to new Body arm. let handled_types = vec![TEXT_HTML, TEXT_PLAIN]; let mut unhandled_types: Vec<_> = m @@ -555,42 +637,69 @@ fn extract_related(m: &ParsedMail) -> Result { .into()) } +fn walk_attachments Option>( + m: &ParsedMail, + visitor: F, +) -> Option { + let mut cur_addr = Vec::new(); + for (idx, sp) in m.subparts.iter().enumerate() { + cur_addr.push(idx); + let val = visitor(sp, &cur_addr); + if val.is_some() { + return val; + } + cur_addr.pop(); + } + None +} + // TODO(wathiede): make this walk_attachments that takes a closure. // Then implement one closure for building `Attachment` and imlement another that can be used to // get the bytes for serving attachments of HTTP fn extract_attachments(m: &ParsedMail) -> Result, Error> { - let mut attachements = Vec::new(); + let mut attachments = Vec::new(); for sp in &m.subparts { - for h in &sp.headers { - if h.get_key() == "Content-Disposition" { - let v = h.get_value(); - if let Some(idx) = v.find(";") { - let dt = &v[..idx]; - match DispositionType::from_str(dt) { - Ok(DispositionType::Attachment) => { - attachements.push(Attachment { - filename: get_attachment_filename(&v).to_string(), - content_type: get_content_type(&sp.headers), - content_id: get_content_id(&sp.headers), - }); - } - Ok(DispositionType::Inline) => continue, - Err(e) => { - warn!("failed to parse Content-Disposition type '{}'", e); - continue; - } - }; - } else { - warn!("header has Content-Disposition missing ';'"); - continue; - } - } + if let Some(attachment) = extract_attachment(sp) { + attachments.push(attachment); } } - Ok(attachements) + Ok(attachments) +} + +fn extract_attachment(m: &ParsedMail) -> Option { + let pcd = m.get_content_disposition(); + // TODO: do we need to handle empty filename attachments, or should we change the definition of + // Attachment::filename? + let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else { + return None; + }; + + // TODO: grab this from somewhere + let content_id = None; + let bytes = match m.get_body_raw() { + Ok(bytes) => bytes, + Err(err) => { + error!("failed to get body for attachment: {err}"); + return None; + } + }; + return Some(Attachment { + disposition: pcd.disposition.into(), + filename: Some(filename), + size: pcd + .params + .get("size") + .map(|s| s.parse().unwrap_or_default()), + // TODO: what is the default for ctype? + // TODO: do we want to use m.ctype.params for anything? + content_type: Some(m.ctype.mimetype.clone()), + content_id, + bytes, + }); } fn get_attachment_filename(header_value: &str) -> &str { + info!("get_attachment_filename {header_value}"); // Strip last " let v = &header_value[..header_value.len() - 1]; if let Some(idx) = v.rfind('"') { @@ -625,6 +734,21 @@ fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option { fn render_content_type_tree(m: &ParsedMail) -> String { const WIDTH: usize = 4; + const SKIP_HEADERS: [&str; 4] = [ + "Authentication-Results", + "DKIM-Signature", + "Received", + "Received-SPF", + ]; + fn render_ct_rec(m: &ParsedMail, depth: usize) -> String { + let mut parts = Vec::new(); + let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype); + parts.push(msg); + for sp in &m.subparts { + parts.push(render_ct_rec(sp, depth + 1)) + } + parts.join("\n") + } fn render_rec(m: &ParsedMail, depth: usize) -> String { let mut parts = Vec::new(); let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype); @@ -639,7 +763,14 @@ fn render_content_type_tree(m: &ParsedMail) -> String { if !m.headers.is_empty() { parts.push(format!("{indent} == headers ==")); for h in &m.headers { - parts.push(format!("{indent} {}: {}", h.get_key(), h.get_value())); + if h.get_key().starts_with('X') { + continue; + } + if SKIP_HEADERS.contains(&h.get_key().as_str()) { + continue; + } + + parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value())); } } for sp in &m.subparts { @@ -647,11 +778,14 @@ fn render_content_type_tree(m: &ParsedMail) -> String { } parts.join("\n") } - render_rec(m, 1) + format!( + "Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*", + render_ct_rec(m, 1), + render_rec(m, 1), + SKIP_HEADERS.join("\n ") + ) } -pub type GraphqlSchema = Schema; - fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result, Error> { let mut addrs = Vec::new(); for header_value in m.headers.get_all_values(header_name) { @@ -694,3 +828,28 @@ fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result Result { + let files = nm.files(id)?; + let Some(path) = files.first() else { + warn!("failed to find files for message {id}"); + return Err(ServerError::PartNotFound); + }; + let file = File::open(&path)?; + let mmap = unsafe { MmapOptions::new().map(&file)? }; + let m = parse_mail(&mmap)?; + if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| { + info!("checking {cur_idx:?}=={idx:?}"); + if cur_idx == idx { + let attachment = extract_attachment(&sp).unwrap_or(Attachment { + ..Attachment::default() + }); + return Some(attachment); + } + None + }) { + return Ok(attachment); + } + + Err(ServerError::PartNotFound) +} diff --git a/web/Cargo.toml b/web/Cargo.toml index 8241498..4f2b60c 100644 --- a/web/Cargo.toml +++ b/web/Cargo.toml @@ -33,7 +33,6 @@ thiserror = "1.0.50" seed_hooks = { git = "https://github.com/wathiede/styles_hooks", package = "seed_hooks", branch = "main" } gloo-net = { version = "0.4.0", features = ["json", "serde_json"] } - [package.metadata.wasm-pack.profile.release] wasm-opt = ['-Os'] diff --git a/web/Trunk.toml b/web/Trunk.toml index 135250a..ca16f3d 100644 --- a/web/Trunk.toml +++ b/web/Trunk.toml @@ -15,6 +15,10 @@ backend="http://localhost:9345/original" backend="http://localhost:9345/graphiql" [[proxy]] backend="http://localhost:9345/graphql" +[[proxy]] +backend="http://localhost:9345/download" +[[proxy]] +backend="http://localhost:9345/view" [[hooks]] stage = "pre_build" diff --git a/web/graphql/schema.json b/web/graphql/schema.json index 4f009ef..755b113 100644 --- a/web/graphql/schema.json +++ b/web/graphql/schema.json @@ -72,13 +72,21 @@ "isDeprecated": false, "name": "filename", "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "String", - "ofType": null - } + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + { + "args": [], + "deprecationReason": null, + "description": null, + "isDeprecated": false, + "name": "size", + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null } }, { @@ -104,6 +112,46 @@ "name": "String", "ofType": null } + }, + { + "args": [], + "deprecationReason": null, + "description": null, + "isDeprecated": false, + "name": "disposition", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "DispositionType", + "ofType": null + } + } + }, + { + "args": [], + "deprecationReason": null, + "description": null, + "isDeprecated": false, + "name": "bytes", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + } + } + } } ], "inputFields": null, @@ -148,6 +196,29 @@ "name": "Boolean", "possibleTypes": null }, + { + "description": null, + "enumValues": [ + { + "deprecationReason": null, + "description": null, + "isDeprecated": false, + "name": "INLINE" + }, + { + "deprecationReason": null, + "description": null, + "isDeprecated": false, + "name": "ATTACHMENT" + } + ], + "fields": null, + "inputFields": null, + "interfaces": null, + "kind": "ENUM", + "name": "DispositionType", + "possibleTypes": null + }, { "description": null, "enumValues": null,