use std::{ collections::HashMap, fs::File, hash::{DefaultHasher, Hash, Hasher}, str::FromStr, }; use async_graphql::{ connection::{self, Connection, Edge}, Context, EmptyMutation, EmptySubscription, Enum, Error, FieldResult, Object, Schema, SimpleObject, Union, }; use log::{error, info, warn}; use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail}; use memmap::MmapOptions; use notmuch::Notmuch; use rocket::time::Instant; pub struct QueryRoot; /// # Number of seconds since the Epoch pub type UnixTime = isize; /// # Thread ID, sans "thread:" pub type ThreadId = String; #[derive(Debug, SimpleObject)] pub struct ThreadSummary { pub thread: ThreadId, pub timestamp: UnixTime, /// user-friendly timestamp pub date_relative: String, /// number of matched messages pub matched: isize, /// total messages in thread pub total: isize, /// comma-separated names with | between matched and unmatched pub authors: String, pub subject: String, pub tags: Vec, } #[derive(Debug, SimpleObject)] pub struct Thread { subject: String, messages: Vec, } #[derive(Debug, SimpleObject)] pub struct Message { // Message-ID for message, prepend `id:` to search in notmuch pub id: String, // First From header found in email pub from: Option, // All To headers found in email pub to: Vec, // All CC headers found in email pub cc: Vec, // First Subject header found in email pub subject: Option, // Parsed Date header, if found and valid pub timestamp: Option, // Headers pub headers: Vec
, // The body contents pub body: Body, // On disk location of message pub path: String, pub attachments: Vec, pub tags: Vec, } // Content-Type: image/jpeg; name="PXL_20231125_204826860.jpg" // Content-Disposition: attachment; filename="PXL_20231125_204826860.jpg" // Content-Transfer-Encoding: base64 // Content-ID: // X-Attachment-Id: f_lponoluo1 #[derive(Debug, SimpleObject)] pub struct Attachment { filename: String, content_type: Option, content_id: Option, } #[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)] enum DispositionType { Inline, Attachment, } impl FromStr for DispositionType { type Err = String; // Required method fn from_str(s: &str) -> Result { Ok(match s { "inline" => DispositionType::Inline, "attachment" => DispositionType::Attachment, c => return Err(format!("unknown disposition type: {c}")), }) } } #[derive(Debug, SimpleObject)] pub struct Header { key: String, value: String, } #[derive(Debug)] pub struct UnhandledContentType { text: String, } #[Object] impl UnhandledContentType { async fn contents(&self) -> &str { &self.text } } #[derive(Debug)] pub struct PlainText { text: String, content_tree: String, } #[Object] impl PlainText { async fn contents(&self) -> &str { &self.text } async fn content_tree(&self) -> &str { &self.content_tree } } #[derive(Debug)] pub struct Html { html: String, content_tree: String, } #[Object] impl Html { async fn contents(&self) -> &str { &self.html } async fn content_tree(&self) -> &str { &self.content_tree } async fn headers(&self) -> Vec
{ Vec::new() } } #[derive(Debug, Union)] pub enum Body { UnhandledContentType(UnhandledContentType), PlainText(PlainText), Html(Html), } impl Body { fn html(html: String) -> Body { Body::Html(Html { html, content_tree: "".to_string(), }) } fn text(text: String) -> Body { Body::PlainText(PlainText { text, content_tree: "".to_string(), }) } } #[derive(Debug, SimpleObject)] pub struct Email { pub name: Option, pub addr: Option, } #[derive(SimpleObject)] struct Tag { name: String, fg_color: String, bg_color: String, unread: usize, } #[Object] impl QueryRoot { async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result { let nm = ctx.data_unchecked::(); Ok(nm.count(&query)?) } async fn search<'ctx>( &self, ctx: &Context<'ctx>, after: Option, before: Option, first: Option, last: Option, query: String, ) -> Result, Error> { let nm = ctx.data_unchecked::(); connection::query( after, before, first, last, |after, before, first, last| async move { let total = nm.count(&query)?; let (first, last) = if let (None, None) = (first, last) { info!("neither first nor last set, defaulting first to 20"); (Some(20), None) } else { (first, last) }; let mut start = after.map(|after| after + 1).unwrap_or(0); let mut end = before.unwrap_or(total); if let Some(first) = first { end = (start + first).min(end); } if let Some(last) = last { start = if last > end - start { end } else { end - last }; } let count = end - start; let slice: Vec = nm .search(&query, start, count)? .0 .into_iter() .map(|ts| ThreadSummary { thread: ts.thread, timestamp: ts.timestamp, date_relative: ts.date_relative, matched: ts.matched, total: ts.total, authors: ts.authors, subject: ts.subject, tags: ts.tags, }) .collect(); let mut connection = Connection::new(start > 0, end < total); connection.edges.extend( slice .into_iter() .enumerate() .map(|(idx, item)| Edge::new(start + idx, item)), ); Ok::<_, Error>(connection) }, ) .await } async fn tags<'ctx>(&self, ctx: &Context<'ctx>) -> FieldResult> { let nm = ctx.data_unchecked::(); let now = Instant::now(); let needs_unread = ctx.look_ahead().field("unread").exists(); let unread_msg_cnt: HashMap = if needs_unread { // 10000 is an arbitrary number, if there's more than 10k unread messages, we'll // get an inaccurate count. nm.search("is:unread", 0, 10000)? .0 .iter() .fold(HashMap::new(), |mut m, ts| { ts.tags.iter().for_each(|t| { m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1); }); m }) } else { HashMap::new() }; let tags = nm .tags()? .into_iter() .map(|tag| { let mut hasher = DefaultHasher::new(); tag.hash(&mut hasher); let hex = format!("#{:06x}", hasher.finish() % (1 << 24)); let unread = if needs_unread { *unread_msg_cnt.get(&tag).unwrap_or(&0) } else { 0 }; Tag { name: tag, fg_color: "white".to_string(), bg_color: hex, unread, } }) .collect(); info!("Fetching tags took {}", now.elapsed()); Ok(tags) } async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result { // TODO(wathiede): normalize all email addresses through an address book with preferred // display names (that default to the most commonly seen name). let nm = ctx.data_unchecked::(); let debug_content_tree = ctx .look_ahead() .field("messages") .field("body") .field("contentTree") .exists(); let mut messages = Vec::new(); for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) { info!("{id}\nfile: {path}"); let msg = nm.show(&format!("id:{id}"))?; let tags = msg.0[0].0[0] .0 .as_ref() .map(|m| m.tags.clone()) .unwrap_or_else(Vec::default); let file = File::open(&path)?; let mmap = unsafe { MmapOptions::new().map(&file)? }; let m = parse_mail(&mmap)?; let from = email_addresses(&path, &m, "from")?; let from = match from.len() { 0 => None, 1 => from.into_iter().next(), _ => { warn!( "Got {} from addresses in message, truncating: {:?}", from.len(), from ); from.into_iter().next() } }; let to = email_addresses(&path, &m, "to")?; let cc = email_addresses(&path, &m, "cc")?; let subject = m.headers.get_first_value("subject"); let timestamp = m .headers .get_first_value("date") .and_then(|d| mailparse::dateparse(&d).ok()); let body = match extract_body(&m)? { Body::PlainText(PlainText { text, content_tree }) => Body::PlainText(PlainText { text, content_tree: if debug_content_tree { render_content_type_tree(&m) } else { content_tree }, }), Body::Html(Html { html, content_tree }) => Body::Html(Html { html: ammonia::clean(&html), content_tree: if debug_content_tree { render_content_type_tree(&m) } else { content_tree }, }), b => b, }; let headers = m .headers .iter() .map(|h| Header { key: h.get_key(), value: h.get_value(), }) .collect(); // TODO(wathiede): parse message and fill out attachments let attachments = extract_attachments(&m)?; messages.push(Message { id, from, to, cc, subject, tags, timestamp, headers, body, path, attachments, }); } messages.reverse(); // Find the first subject that's set. After reversing the vec, this should be the oldest // message. let subject: String = messages .iter() .skip_while(|m| m.subject.is_none()) .next() .and_then(|m| m.subject.clone()) .unwrap_or("(NO SUBJECT)".to_string()); Ok(Thread { subject, messages }) } } fn extract_body(m: &ParsedMail) -> Result { let body = m.get_body()?; let ret = match m.ctype.mimetype.as_str() { "text/plain" => return Ok(Body::text(body)), "text/html" => return Ok(Body::html(body)), "multipart/mixed" => extract_mixed(m), "multipart/alternative" => extract_alternative(m), _ => extract_unhandled(m), }; if let Err(err) = ret { error!("Failed to extract body: {err:?}"); return Ok(extract_unhandled(m)?); } ret } fn extract_unhandled(m: &ParsedMail) -> Result { let msg = format!( "Unhandled body content type:\n{}", render_content_type_tree(m) ); warn!("{}", msg); Ok(Body::UnhandledContentType(UnhandledContentType { text: msg, })) } // multipart/alternative defines multiple representations of the same message, and clients should // show the fanciest they can display. For this program, the priority is text/html, text/plain, // then give up. fn extract_alternative(m: &ParsedMail) -> Result { for sp in &m.subparts { if sp.ctype.mimetype == "text/html" { let body = sp.get_body()?; return Ok(Body::html(body)); } } for sp in &m.subparts { if sp.ctype.mimetype == "text/plain" { let body = sp.get_body()?; return Ok(Body::text(body)); } } Err("extract_alternative".into()) } // multipart/mixed defines multiple types of context all of which should be presented to the user // 'serially'. fn extract_mixed(m: &ParsedMail) -> Result { for sp in &m.subparts { if sp.ctype.mimetype == "multipart/alternative" { return extract_alternative(sp); } } for sp in &m.subparts { if sp.ctype.mimetype == "multipart/related" { return extract_related(sp); } } for sp in &m.subparts { let body = sp.get_body()?; match sp.ctype.mimetype.as_str() { "text/plain" => return Ok(Body::text(body)), "text/html" => return Ok(Body::html(body)), _ => (), } } Err("extract_mixed".into()) } fn extract_related(m: &ParsedMail) -> Result { // TODO(wathiede): collect related things and change return type to new Body arm. for sp in &m.subparts { if sp.ctype.mimetype == "text/html" { let body = sp.get_body()?; return Ok(Body::html(body)); } } for sp in &m.subparts { if sp.ctype.mimetype == "text/plain" { let body = sp.get_body()?; return Ok(Body::text(body)); } } Err("extract_related".into()) } // TODO(wathiede): make this walk_attachments that takes a closure. // Then implement one closure for building `Attachment` and imlement another that can be used to // get the bytes for serving attachments of HTTP fn extract_attachments(m: &ParsedMail) -> Result, Error> { let mut attachements = Vec::new(); for sp in &m.subparts { for h in &sp.headers { if h.get_key() == "Content-Disposition" { let v = h.get_value(); if let Some(idx) = v.find(";") { let dt = &v[..idx]; match DispositionType::from_str(dt) { Ok(DispositionType::Attachment) => { attachements.push(Attachment { filename: get_attachment_filename(&v).to_string(), content_type: get_content_type(&sp.headers), content_id: get_content_id(&sp.headers), }); } Ok(DispositionType::Inline) => continue, Err(e) => { warn!("failed to parse Content-Disposition type '{}'", e); continue; } }; } else { warn!("header has Content-Disposition missing ';'"); continue; } } } } Ok(attachements) } fn get_attachment_filename(header_value: &str) -> &str { // Strip last " let v = &header_value[..header_value.len() - 1]; if let Some(idx) = v.rfind('"') { &v[idx + 1..] } else { "" } } fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option { for h in headers { if h.get_key() == "Content-Type" { let v = h.get_value(); if let Some(idx) = v.find(';') { return Some(v[..idx].to_string()); } else { return Some(v); } } } None } fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option { for h in headers { if h.get_key() == "Content-ID" { return Some(h.get_value()); } } None } fn render_content_type_tree(m: &ParsedMail) -> String { const WIDTH: usize = 4; fn render_rec(m: &ParsedMail, depth: usize) -> String { let mut parts = Vec::new(); let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype); parts.push(msg); let indent = " ".repeat(depth * WIDTH); if !m.ctype.charset.is_empty() { parts.push(format!("{indent} Character Set: {}", m.ctype.charset)); } for (k, v) in m.ctype.params.iter() { parts.push(format!("{indent} {k}: {v}")); } if !m.headers.is_empty() { parts.push(format!("{indent} == headers ==")); for h in &m.headers { parts.push(format!("{indent} {}: {}", h.get_key(), h.get_value())); } } for sp in &m.subparts { parts.push(render_rec(sp, depth + 1)) } parts.join("\n") } render_rec(m, 1) } pub type GraphqlSchema = Schema; fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result, Error> { let mut addrs = Vec::new(); for header_value in m.headers.get_all_values(header_name) { match mailparse::addrparse(&header_value) { Ok(mal) => { for ma in mal.into_inner() { match ma { mailparse::MailAddr::Group(gi) => { if !gi.group_name.contains("ndisclosed") { println!("[{path}][{header_name}] Group: {gi}"); } } mailparse::MailAddr::Single(s) => addrs.push(Email { name: s.display_name, addr: Some(s.addr), }), //println!("Single: {s}"), } } } Err(_) => { let v = header_value; if v.matches('@').count() == 1 { if v.matches('<').count() == 1 && v.ends_with('>') { let idx = v.find('<').unwrap(); let addr = &v[idx + 1..v.len() - 1].trim(); let name = &v[..idx].trim(); addrs.push(Email { name: Some(name.to_string()), addr: Some(addr.to_string()), }); } } else { addrs.push(Email { name: Some(v), addr: None, }); } } } } Ok(addrs) }