use std::{ collections::HashMap, fs::File, hash::{DefaultHasher, Hash, Hasher}, time::Instant, }; use log::{error, info, warn}; use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail}; use memmap::MmapOptions; use notmuch::Notmuch; use crate::{ compute_offset_limit, error::ServerError, graphql::{ Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText, Tag, Thread, ThreadSummary, UnhandledContentType, }, linkify_html, sanitize_html, }; const TEXT_PLAIN: &'static str = "text/plain"; const TEXT_HTML: &'static str = "text/html"; const IMAGE_JPEG: &'static str = "image/jpeg"; const IMAGE_PNG: &'static str = "image/png"; const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative"; const MULTIPART_MIXED: &'static str = "multipart/mixed"; const MULTIPART_RELATED: &'static str = "multipart/related"; const MAX_RAW_MESSAGE_SIZE: usize = 100_000; // TODO(wathiede): decide good error type pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result, ServerError> { for t in thread_set.0 { for _tn in t.0 {} } Ok(Vec::new()) } pub async fn count(nm: &Notmuch, query: &str) -> Result { Ok(nm.count(query)?) } pub async fn search( nm: &Notmuch, after: Option, before: Option, first: Option, last: Option, query: String, ) -> Result, async_graphql::Error> { let (offset, mut limit) = compute_offset_limit(after, before, first, last); if before.is_none() { // When searching forward, the +1 is to see if there are more pages of data available. // Searching backwards implies there's more pages forward, because the value represented by // `before` is on the next page. limit = limit + 1; } Ok(nm .search(&query, offset as usize, limit as usize)? .0 .into_iter() .enumerate() .map(|(i, ts)| { ( offset + i as i32, ThreadSummary { thread: format!("thread:{}", ts.thread), timestamp: ts.timestamp, date_relative: ts.date_relative, matched: ts.matched, total: ts.total, authors: ts.authors, subject: ts.subject, tags: ts.tags, }, ) }) .collect()) } pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result, ServerError> { let now = Instant::now(); let unread_msg_cnt: HashMap = if needs_unread { // 10000 is an arbitrary number, if there's more than 10k unread messages, we'll // get an inaccurate count. nm.search("is:unread", 0, 10000)? .0 .iter() .fold(HashMap::new(), |mut m, ts| { ts.tags.iter().for_each(|t| { m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1); }); m }) } else { HashMap::new() }; let tags = nm .tags()? .into_iter() .map(|tag| { let mut hasher = DefaultHasher::new(); tag.hash(&mut hasher); let hex = format!("#{:06x}", hasher.finish() % (1 << 24)); let unread = if needs_unread { *unread_msg_cnt.get(&tag).unwrap_or(&0) } else { 0 }; Tag { name: tag, fg_color: "white".to_string(), bg_color: hex, unread, } }) .collect(); info!("Fetching tags took {} seconds", now.elapsed().as_secs_f32()); Ok(tags) } pub async fn thread( nm: &Notmuch, thread_id: String, debug_content_tree: bool, ) -> Result { // TODO(wathiede): normalize all email addresses through an address book with preferred // display names (that default to the most commonly seen name). let mut messages = Vec::new(); for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) { let tags = nm.tags_for_query(&format!("id:{id}"))?; let file = File::open(&path)?; let mmap = unsafe { MmapOptions::new().map(&file)? }; let m = parse_mail(&mmap)?; let from = email_addresses(&path, &m, "from")?; let from = match from.len() { 0 => None, 1 => from.into_iter().next(), _ => { warn!( "Got {} from addresses in message, truncating: {:?}", from.len(), from ); from.into_iter().next() } }; let to = email_addresses(&path, &m, "to")?; let cc = email_addresses(&path, &m, "cc")?; let subject = m.headers.get_first_value("subject"); let timestamp = m .headers .get_first_value("date") .and_then(|d| mailparse::dateparse(&d).ok()); let cid_prefix = shared::urls::cid_prefix(None, &id); let base_url = None; let body = match extract_body(&m, &id)? { Body::PlainText(PlainText { text, content_tree }) => { let text = if text.len() > MAX_RAW_MESSAGE_SIZE { format!( "{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes", &text[..MAX_RAW_MESSAGE_SIZE], MAX_RAW_MESSAGE_SIZE ) } else { text }; Body::Html(Html { html: format!( r#"

{}

"#, // Trim newlines to prevent excessive white space at the beginning/end of // presenation. Leave tabs and spaces incase plain text attempts to center a // header on the first line. sanitize_html( &linkify_html(&text.trim_matches('\n')), &cid_prefix, &base_url )? ), content_tree: if debug_content_tree { render_content_type_tree(&m) } else { content_tree }, }) } Body::Html(Html { html, content_tree }) => Body::Html(Html { html: sanitize_html(&html, &cid_prefix, &base_url)?, content_tree: if debug_content_tree { render_content_type_tree(&m) } else { content_tree }, }), Body::UnhandledContentType(UnhandledContentType { content_tree, .. }) => { let body_start = mmap .windows(2) .take(20_000) .position(|w| w == b"\n\n") .unwrap_or(0); let body = mmap[body_start + 2..].to_vec(); Body::UnhandledContentType(UnhandledContentType { text: String::from_utf8(body)?, content_tree: if debug_content_tree { render_content_type_tree(&m) } else { content_tree }, }) } }; let headers = m .headers .iter() .map(|h| Header { key: h.get_key(), value: h.get_value(), }) .collect(); // TODO(wathiede): parse message and fill out attachments let attachments = extract_attachments(&m, &id)?; messages.push(Message { id: format!("id:{id}"), from, to, cc, subject, tags, timestamp, headers, body, path, attachments, }); } messages.reverse(); // Find the first subject that's set. After reversing the vec, this should be the oldest // message. let subject: String = messages .iter() .skip_while(|m| m.subject.is_none()) .next() .and_then(|m| m.subject.clone()) .unwrap_or("(NO SUBJECT)".to_string()); Ok(Thread::Email(EmailThread { thread_id, subject, messages, })) } fn email_addresses( path: &str, m: &ParsedMail, header_name: &str, ) -> Result, ServerError> { let mut addrs = Vec::new(); for header_value in m.headers.get_all_values(header_name) { match mailparse::addrparse(&header_value) { Ok(mal) => { for ma in mal.into_inner() { match ma { mailparse::MailAddr::Group(gi) => { if !gi.group_name.contains("ndisclosed") { println!("[{path}][{header_name}] Group: {gi}"); } } mailparse::MailAddr::Single(s) => addrs.push(Email { name: s.display_name, addr: Some(s.addr), }), //println!("Single: {s}"), } } } Err(_) => { let v = header_value; if v.matches('@').count() == 1 { if v.matches('<').count() == 1 && v.ends_with('>') { let idx = v.find('<').unwrap(); let addr = &v[idx + 1..v.len() - 1].trim(); let name = &v[..idx].trim(); addrs.push(Email { name: Some(name.to_string()), addr: Some(addr.to_string()), }); } } else { addrs.push(Email { name: Some(v), addr: None, }); } } } } Ok(addrs) } pub fn cid_attachment_bytes(nm: &Notmuch, id: &str, cid: &str) -> Result { let files = nm.files(id)?; let Some(path) = files.first() else { warn!("failed to find files for message {id}"); return Err(ServerError::PartNotFound); }; let file = File::open(&path)?; let mmap = unsafe { MmapOptions::new().map(&file)? }; let m = parse_mail(&mmap)?; if let Some(attachment) = walk_attachments(&m, |sp, _cur_idx| { info!("{cid} {:?}", get_content_id(&sp.headers)); if let Some(h_cid) = get_content_id(&sp.headers) { let h_cid = &h_cid[1..h_cid.len() - 1]; if h_cid == cid { let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment { ..Attachment::default() }); return Some(attachment); } } None }) { return Ok(attachment); } Err(ServerError::PartNotFound) } pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result { let files = nm.files(id)?; let Some(path) = files.first() else { warn!("failed to find files for message {id}"); return Err(ServerError::PartNotFound); }; let file = File::open(&path)?; let mmap = unsafe { MmapOptions::new().map(&file)? }; let m = parse_mail(&mmap)?; if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| { if cur_idx == idx { let attachment = extract_attachment(&sp, id, idx).unwrap_or(Attachment { ..Attachment::default() }); return Some(attachment); } None }) { return Ok(attachment); } Err(ServerError::PartNotFound) } fn extract_body(m: &ParsedMail, id: &str) -> Result { let mut part_addr = Vec::new(); part_addr.push(id.to_string()); let body = m.get_body()?; let ret = match m.ctype.mimetype.as_str() { TEXT_PLAIN => return Ok(Body::text(body)), TEXT_HTML => return Ok(Body::html(body)), MULTIPART_MIXED => extract_mixed(m, &mut part_addr), MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr), MULTIPART_RELATED => extract_related(m, &mut part_addr), _ => extract_unhandled(m), }; if let Err(err) = ret { error!("Failed to extract body: {err:?}"); return Ok(extract_unhandled(m)?); } ret } fn extract_unhandled(m: &ParsedMail) -> Result { let msg = format!( "Unhandled body content type:\n{}\n{}", render_content_type_tree(m), m.get_body()?, ); Ok(Body::UnhandledContentType(UnhandledContentType { text: msg, content_tree: render_content_type_tree(m), })) } // multipart/alternative defines multiple representations of the same message, and clients should // show the fanciest they can display. For this program, the priority is text/html, text/plain, // then give up. fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec) -> Result { let handled_types = vec![ MULTIPART_ALTERNATIVE, MULTIPART_MIXED, MULTIPART_RELATED, TEXT_HTML, TEXT_PLAIN, ]; for sp in &m.subparts { if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE { return extract_alternative(sp, part_addr); } } for sp in &m.subparts { if sp.ctype.mimetype.as_str() == MULTIPART_MIXED { return extract_related(sp, part_addr); } } for sp in &m.subparts { if sp.ctype.mimetype.as_str() == MULTIPART_RELATED { return extract_related(sp, part_addr); } } for sp in &m.subparts { if sp.ctype.mimetype.as_str() == TEXT_HTML { let body = sp.get_body()?; return Ok(Body::html(body)); } } for sp in &m.subparts { if sp.ctype.mimetype.as_str() == TEXT_PLAIN { let body = sp.get_body()?; return Ok(Body::text(body)); } } Err(ServerError::StringError(format!( "extract_alternative failed to find suitable subpart, searched: {:?}", handled_types ))) } // multipart/mixed defines multiple types of context all of which should be presented to the user // 'serially'. fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec) -> Result { let handled_types = vec![ MULTIPART_ALTERNATIVE, MULTIPART_RELATED, TEXT_HTML, TEXT_PLAIN, IMAGE_JPEG, IMAGE_PNG, ]; let mut unhandled_types: Vec<_> = m .subparts .iter() .map(|sp| sp.ctype.mimetype.as_str()) .filter(|mt| !handled_types.contains(&mt)) .collect(); unhandled_types.sort(); if !unhandled_types.is_empty() { warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}"); } let mut parts = Vec::new(); for (idx, sp) in m.subparts.iter().enumerate() { part_addr.push(idx.to_string()); match sp.ctype.mimetype.as_str() { MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?), MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?), TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)), TEXT_HTML => parts.push(Body::html(sp.get_body()?)), IMAGE_JPEG | IMAGE_PNG => { let pcd = sp.get_content_disposition(); let filename = pcd .params .get("filename") .map(|s| s.clone()) .unwrap_or("".to_string()); // Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side. if pcd.disposition == mailparse::DispositionType::Inline { // TODO: make URL generation more programatic based on what the frontend has // mapped parts.push(Body::html(format!( r#""#, part_addr[0], part_addr .iter() .skip(1) .map(|i| i.to_string()) .collect::>() .join(".") ))); } } _ => (), } part_addr.pop(); } Ok(flatten_body_parts(&parts)) } fn flatten_body_parts(parts: &[Body]) -> Body { let html = parts .iter() .map(|p| match p { Body::PlainText(PlainText { text, .. }) => { format!( r#"

{}

"#, // Trim newlines to prevent excessive white space at the beginning/end of // presenation. Leave tabs and spaces incase plain text attempts to center a // header on the first line. linkify_html(&text.trim_matches('\n')) ) } Body::Html(Html { html, .. }) => html.clone(), Body::UnhandledContentType(UnhandledContentType { text, .. }) => { error!("text len {}", text.len()); format!( r#"

{}

"#, // Trim newlines to prevent excessive white space at the beginning/end of // presenation. Leave tabs and spaces incase plain text attempts to center a // header on the first line. linkify_html(&text.trim_matches('\n')) ) } }) .collect::>() .join("\n"); info!("flatten_body_parts {} {html}", parts.len()); Body::html(html) } fn extract_related(m: &ParsedMail, part_addr: &mut Vec) -> Result { // TODO(wathiede): collect related things and change return type to new Body arm. let handled_types = vec![ MULTIPART_ALTERNATIVE, TEXT_HTML, TEXT_PLAIN, IMAGE_JPEG, IMAGE_PNG, ]; let mut unhandled_types: Vec<_> = m .subparts .iter() .map(|sp| sp.ctype.mimetype.as_str()) .filter(|mt| !handled_types.contains(&mt)) .collect(); unhandled_types.sort(); if !unhandled_types.is_empty() { warn!("{MULTIPART_RELATED} contains the following unhandled mimetypes {unhandled_types:?}"); } for (i, sp) in m.subparts.iter().enumerate() { if sp.ctype.mimetype == IMAGE_PNG || sp.ctype.mimetype == IMAGE_JPEG { info!("sp.ctype {:#?}", sp.ctype); //info!("sp.headers {:#?}", sp.headers); if let Some(cid) = sp.headers.get_first_value("Content-Id") { let mut part_id = part_addr.clone(); part_id.push(i.to_string()); info!("cid: {cid} part_id {part_id:?}"); } } } for sp in &m.subparts { if sp.ctype.mimetype == MULTIPART_ALTERNATIVE { return extract_alternative(m, part_addr); } } for sp in &m.subparts { if sp.ctype.mimetype == TEXT_HTML { let body = sp.get_body()?; return Ok(Body::html(body)); } } for sp in &m.subparts { if sp.ctype.mimetype == TEXT_PLAIN { let body = sp.get_body()?; return Ok(Body::text(body)); } } Err(ServerError::StringError(format!( "extract_related failed to find suitable subpart, searched: {:?}", handled_types ))) } fn walk_attachments Option + Copy>( m: &ParsedMail, visitor: F, ) -> Option { let mut cur_addr = Vec::new(); walk_attachments_inner(m, visitor, &mut cur_addr) } fn walk_attachments_inner Option + Copy>( m: &ParsedMail, visitor: F, cur_addr: &mut Vec, ) -> Option { for (idx, sp) in m.subparts.iter().enumerate() { cur_addr.push(idx); let val = visitor(sp, &cur_addr); if val.is_some() { return val; } let val = walk_attachments_inner(sp, visitor, cur_addr); if val.is_some() { return val; } cur_addr.pop(); } None } // TODO(wathiede): make this walk_attachments that takes a closure. // Then implement one closure for building `Attachment` and imlement another that can be used to // get the bytes for serving attachments of HTTP fn extract_attachments(m: &ParsedMail, id: &str) -> Result, ServerError> { let mut attachments = Vec::new(); for (idx, sp) in m.subparts.iter().enumerate() { if let Some(attachment) = extract_attachment(sp, id, &[idx]) { // Filter out inline attachements, they're flattened into the body of the message. if attachment.disposition == DispositionType::Attachment { attachments.push(attachment); } } } Ok(attachments) } fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option { let pcd = m.get_content_disposition(); // TODO: do we need to handle empty filename attachments, or should we change the definition of // Attachment::filename? let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else { return None; }; // TODO: grab this from somewhere let content_id = None; let bytes = match m.get_body_raw() { Ok(bytes) => bytes, Err(err) => { error!("failed to get body for attachment: {err}"); return None; } }; return Some(Attachment { id: id.to_string(), idx: idx .iter() .map(|i| i.to_string()) .collect::>() .join("."), disposition: pcd.disposition.into(), filename: Some(filename), size: bytes.len(), // TODO: what is the default for ctype? // TODO: do we want to use m.ctype.params for anything? content_type: Some(m.ctype.mimetype.clone()), content_id, bytes, }); } pub fn get_attachment_filename(header_value: &str) -> &str { info!("get_attachment_filename {header_value}"); // Strip last " let v = &header_value[..header_value.len() - 1]; if let Some(idx) = v.rfind('"') { &v[idx + 1..] } else { "" } } pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option { if let Some(v) = headers.get_first_value("Content-Type") { if let Some(idx) = v.find(';') { return Some(v[..idx].to_string()); } else { return Some(v); } } None } fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option { headers.get_first_value("Content-Id") } fn render_content_type_tree(m: &ParsedMail) -> String { const WIDTH: usize = 4; const SKIP_HEADERS: [&str; 4] = [ "Authentication-Results", "DKIM-Signature", "Received", "Received-SPF", ]; fn render_ct_rec(m: &ParsedMail, depth: usize) -> String { let mut parts = Vec::new(); let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype); parts.push(msg); for sp in &m.subparts { parts.push(render_ct_rec(sp, depth + 1)) } parts.join("\n") } fn render_rec(m: &ParsedMail, depth: usize) -> String { let mut parts = Vec::new(); let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype); parts.push(msg); let indent = " ".repeat(depth * WIDTH); if !m.ctype.charset.is_empty() { parts.push(format!("{indent} Character Set: {}", m.ctype.charset)); } for (k, v) in m.ctype.params.iter() { parts.push(format!("{indent} {k}: {v}")); } if !m.headers.is_empty() { parts.push(format!("{indent} == headers ==")); for h in &m.headers { if h.get_key().starts_with('X') { continue; } if SKIP_HEADERS.contains(&h.get_key().as_str()) { continue; } parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value())); } } for sp in &m.subparts { parts.push(render_rec(sp, depth + 1)) } parts.join("\n") } format!( "Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*", render_ct_rec(m, 1), render_rec(m, 1), SKIP_HEADERS.join("\n ") ) } pub async fn set_read_status<'ctx>( nm: &Notmuch, query: &str, unread: bool, ) -> Result { if unread { nm.tag_add("unread", &format!("{query}"))?; } else { nm.tag_remove("unread", &format!("{query}"))?; } Ok(true) }