letterbox/server/src/graphql.rs

629 lines
19 KiB
Rust

use std::{
collections::HashMap,
fs::File,
hash::{DefaultHasher, Hash, Hasher},
str::FromStr,
};
use async_graphql::{
connection::{self, Connection, Edge},
Context, EmptyMutation, EmptySubscription, Enum, Error, FieldResult, Object, Schema,
SimpleObject, Union,
};
use log::{error, info, warn};
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use notmuch::Notmuch;
use rocket::time::Instant;
pub struct QueryRoot;
/// # Number of seconds since the Epoch
pub type UnixTime = isize;
/// # Thread ID, sans "thread:"
pub type ThreadId = String;
#[derive(Debug, SimpleObject)]
pub struct ThreadSummary {
pub thread: ThreadId,
pub timestamp: UnixTime,
/// user-friendly timestamp
pub date_relative: String,
/// number of matched messages
pub matched: isize,
/// total messages in thread
pub total: isize,
/// comma-separated names with | between matched and unmatched
pub authors: String,
pub subject: String,
pub tags: Vec<String>,
}
#[derive(Debug, SimpleObject)]
pub struct Thread {
subject: String,
messages: Vec<Message>,
}
#[derive(Debug, SimpleObject)]
pub struct Message {
// Message-ID for message, prepend `id:<id>` to search in notmuch
pub id: String,
// First From header found in email
pub from: Option<Email>,
// All To headers found in email
pub to: Vec<Email>,
// All CC headers found in email
pub cc: Vec<Email>,
// First Subject header found in email
pub subject: Option<String>,
// Parsed Date header, if found and valid
pub timestamp: Option<i64>,
// Headers
pub headers: Vec<Header>,
// The body contents
pub body: Body,
// On disk location of message
pub path: String,
pub attachments: Vec<Attachment>,
pub tags: Vec<String>,
}
// Content-Type: image/jpeg; name="PXL_20231125_204826860.jpg"
// Content-Disposition: attachment; filename="PXL_20231125_204826860.jpg"
// Content-Transfer-Encoding: base64
// Content-ID: <f_lponoluo1>
// X-Attachment-Id: f_lponoluo1
#[derive(Debug, SimpleObject)]
pub struct Attachment {
filename: String,
content_type: Option<String>,
content_id: Option<String>,
}
#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
enum DispositionType {
Inline,
Attachment,
}
impl FromStr for DispositionType {
type Err = String;
// Required method
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"inline" => DispositionType::Inline,
"attachment" => DispositionType::Attachment,
c => return Err(format!("unknown disposition type: {c}")),
})
}
}
#[derive(Debug, SimpleObject)]
pub struct Header {
key: String,
value: String,
}
#[derive(Debug)]
pub struct UnhandledContentType {
text: String,
}
#[Object]
impl UnhandledContentType {
async fn contents(&self) -> &str {
&self.text
}
}
#[derive(Debug)]
pub struct PlainText {
text: String,
content_tree: String,
}
#[Object]
impl PlainText {
async fn contents(&self) -> &str {
&self.text
}
async fn content_tree(&self) -> &str {
&self.content_tree
}
}
#[derive(Debug)]
pub struct Html {
html: String,
content_tree: String,
}
#[Object]
impl Html {
async fn contents(&self) -> &str {
&self.html
}
async fn content_tree(&self) -> &str {
&self.content_tree
}
async fn headers(&self) -> Vec<Header> {
Vec::new()
}
}
#[derive(Debug, Union)]
pub enum Body {
UnhandledContentType(UnhandledContentType),
PlainText(PlainText),
Html(Html),
}
impl Body {
fn html(html: String) -> Body {
Body::Html(Html {
html,
content_tree: "".to_string(),
})
}
fn text(text: String) -> Body {
Body::PlainText(PlainText {
text,
content_tree: "".to_string(),
})
}
}
#[derive(Debug, SimpleObject)]
pub struct Email {
pub name: Option<String>,
pub addr: Option<String>,
}
#[derive(SimpleObject)]
struct Tag {
name: String,
fg_color: String,
bg_color: String,
unread: usize,
}
#[Object]
impl QueryRoot {
async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
let nm = ctx.data_unchecked::<Notmuch>();
Ok(nm.count(&query)?)
}
async fn search<'ctx>(
&self,
ctx: &Context<'ctx>,
after: Option<String>,
before: Option<String>,
first: Option<i32>,
last: Option<i32>,
query: String,
) -> Result<Connection<usize, ThreadSummary>, Error> {
let nm = ctx.data_unchecked::<Notmuch>();
connection::query(
after,
before,
first,
last,
|after, before, first, last| async move {
let total = nm.count(&query)?;
let (first, last) = if let (None, None) = (first, last) {
info!("neither first nor last set, defaulting first to 20");
(Some(20), None)
} else {
(first, last)
};
let mut start = after.map(|after| after + 1).unwrap_or(0);
let mut end = before.unwrap_or(total);
if let Some(first) = first {
end = (start + first).min(end);
}
if let Some(last) = last {
start = if last > end - start { end } else { end - last };
}
let count = end - start;
let slice: Vec<ThreadSummary> = nm
.search(&query, start, count)?
.0
.into_iter()
.map(|ts| ThreadSummary {
thread: ts.thread,
timestamp: ts.timestamp,
date_relative: ts.date_relative,
matched: ts.matched,
total: ts.total,
authors: ts.authors,
subject: ts.subject,
tags: ts.tags,
})
.collect();
let mut connection = Connection::new(start > 0, end < total);
connection.edges.extend(
slice
.into_iter()
.enumerate()
.map(|(idx, item)| Edge::new(start + idx, item)),
);
Ok::<_, Error>(connection)
},
)
.await
}
async fn tags<'ctx>(&self, ctx: &Context<'ctx>) -> FieldResult<Vec<Tag>> {
let nm = ctx.data_unchecked::<Notmuch>();
let now = Instant::now();
let needs_unread = ctx.look_ahead().field("unread").exists();
let unread_msg_cnt: HashMap<String, usize> = if needs_unread {
// 10000 is an arbitrary number, if there's more than 10k unread messages, we'll
// get an inaccurate count.
nm.search("is:unread", 0, 10000)?
.0
.iter()
.fold(HashMap::new(), |mut m, ts| {
ts.tags.iter().for_each(|t| {
m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1);
});
m
})
} else {
HashMap::new()
};
let tags = nm
.tags()?
.into_iter()
.map(|tag| {
let mut hasher = DefaultHasher::new();
tag.hash(&mut hasher);
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
let unread = if needs_unread {
*unread_msg_cnt.get(&tag).unwrap_or(&0)
} else {
0
};
Tag {
name: tag,
fg_color: "white".to_string(),
bg_color: hex,
unread,
}
})
.collect();
info!("Fetching tags took {}", now.elapsed());
Ok(tags)
}
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
// TODO(wathiede): normalize all email addresses through an address book with preferred
// display names (that default to the most commonly seen name).
let nm = ctx.data_unchecked::<Notmuch>();
let debug_content_tree = ctx
.look_ahead()
.field("messages")
.field("body")
.field("contentTree")
.exists();
let mut messages = Vec::new();
for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
info!("{id}\nfile: {path}");
let msg = nm.show(&format!("id:{id}"))?;
let tags = msg.0[0].0[0]
.0
.as_ref()
.map(|m| m.tags.clone())
.unwrap_or_else(Vec::default);
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let from = email_addresses(&path, &m, "from")?;
let from = match from.len() {
0 => None,
1 => from.into_iter().next(),
_ => {
warn!(
"Got {} from addresses in message, truncating: {:?}",
from.len(),
from
);
from.into_iter().next()
}
};
let to = email_addresses(&path, &m, "to")?;
let cc = email_addresses(&path, &m, "cc")?;
let subject = m.headers.get_first_value("subject");
let timestamp = m
.headers
.get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok());
let body = match extract_body(&m)? {
Body::PlainText(PlainText { text, content_tree }) => Body::PlainText(PlainText {
text,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
}),
Body::Html(Html { html, content_tree }) => Body::Html(Html {
html: ammonia::clean(&html),
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
}),
b => b,
};
let headers = m
.headers
.iter()
.map(|h| Header {
key: h.get_key(),
value: h.get_value(),
})
.collect();
// TODO(wathiede): parse message and fill out attachments
let attachments = extract_attachments(&m)?;
messages.push(Message {
id,
from,
to,
cc,
subject,
tags,
timestamp,
headers,
body,
path,
attachments,
});
}
messages.reverse();
// Find the first subject that's set. After reversing the vec, this should be the oldest
// message.
let subject: String = messages
.iter()
.skip_while(|m| m.subject.is_none())
.next()
.and_then(|m| m.subject.clone())
.unwrap_or("(NO SUBJECT)".to_string());
Ok(Thread { subject, messages })
}
}
fn extract_body(m: &ParsedMail) -> Result<Body, Error> {
let body = m.get_body()?;
let ret = match m.ctype.mimetype.as_str() {
"text/plain" => return Ok(Body::text(body)),
"text/html" => return Ok(Body::html(body)),
"multipart/mixed" => extract_mixed(m),
"multipart/alternative" => extract_alternative(m),
_ => extract_unhandled(m),
};
if let Err(err) = ret {
error!("Failed to extract body: {err:?}");
return Ok(extract_unhandled(m)?);
}
ret
}
fn extract_unhandled(m: &ParsedMail) -> Result<Body, Error> {
let msg = format!(
"Unhandled body content type:\n{}",
render_content_type_tree(m)
);
warn!("{}", msg);
Ok(Body::UnhandledContentType(UnhandledContentType {
text: msg,
}))
}
// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail) -> Result<Body, Error> {
for sp in &m.subparts {
if sp.ctype.mimetype == "text/html" {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == "text/plain" {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err("extract_alternative".into())
}
// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail) -> Result<Body, Error> {
for sp in &m.subparts {
if sp.ctype.mimetype == "multipart/alternative" {
return extract_alternative(sp);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == "multipart/related" {
return extract_related(sp);
}
}
for sp in &m.subparts {
let body = sp.get_body()?;
match sp.ctype.mimetype.as_str() {
"text/plain" => return Ok(Body::text(body)),
"text/html" => return Ok(Body::html(body)),
_ => (),
}
}
Err("extract_mixed".into())
}
fn extract_related(m: &ParsedMail) -> Result<Body, Error> {
// TODO(wathiede): collect related things and change return type to new Body arm.
for sp in &m.subparts {
if sp.ctype.mimetype == "text/html" {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == "text/plain" {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err("extract_related".into())
}
// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail) -> Result<Vec<Attachment>, Error> {
let mut attachements = Vec::new();
for sp in &m.subparts {
for h in &sp.headers {
if h.get_key() == "Content-Disposition" {
let v = h.get_value();
if let Some(idx) = v.find(";") {
let dt = &v[..idx];
match DispositionType::from_str(dt) {
Ok(DispositionType::Attachment) => {
attachements.push(Attachment {
filename: get_attachment_filename(&v).to_string(),
content_type: get_content_type(&sp.headers),
content_id: get_content_id(&sp.headers),
});
}
Ok(DispositionType::Inline) => continue,
Err(e) => {
warn!("failed to parse Content-Disposition type '{}'", e);
continue;
}
};
} else {
warn!("header has Content-Disposition missing ';'");
continue;
}
}
}
}
Ok(attachements)
}
fn get_attachment_filename(header_value: &str) -> &str {
// Strip last "
let v = &header_value[..header_value.len() - 1];
if let Some(idx) = v.rfind('"') {
&v[idx + 1..]
} else {
""
}
}
fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
for h in headers {
if h.get_key() == "Content-Type" {
let v = h.get_value();
if let Some(idx) = v.find(';') {
return Some(v[..idx].to_string());
} else {
return Some(v);
}
}
}
None
}
fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
for h in headers {
if h.get_key() == "Content-ID" {
return Some(h.get_value());
}
}
None
}
fn render_content_type_tree(m: &ParsedMail) -> String {
const WIDTH: usize = 4;
fn render_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
let indent = " ".repeat(depth * WIDTH);
if !m.ctype.charset.is_empty() {
parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
}
for (k, v) in m.ctype.params.iter() {
parts.push(format!("{indent} {k}: {v}"));
}
if !m.headers.is_empty() {
parts.push(format!("{indent} == headers =="));
for h in &m.headers {
parts.push(format!("{indent} {}: {}", h.get_key(), h.get_value()));
}
}
for sp in &m.subparts {
parts.push(render_rec(sp, depth + 1))
}
parts.join("\n")
}
render_rec(m, 1)
}
pub type GraphqlSchema = Schema<QueryRoot, EmptyMutation, EmptySubscription>;
fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result<Vec<Email>, Error> {
let mut addrs = Vec::new();
for header_value in m.headers.get_all_values(header_name) {
match mailparse::addrparse(&header_value) {
Ok(mal) => {
for ma in mal.into_inner() {
match ma {
mailparse::MailAddr::Group(gi) => {
if !gi.group_name.contains("ndisclosed") {
println!("[{path}][{header_name}] Group: {gi}");
}
}
mailparse::MailAddr::Single(s) => addrs.push(Email {
name: s.display_name,
addr: Some(s.addr),
}), //println!("Single: {s}"),
}
}
}
Err(_) => {
let v = header_value;
if v.matches('@').count() == 1 {
if v.matches('<').count() == 1 && v.ends_with('>') {
let idx = v.find('<').unwrap();
let addr = &v[idx + 1..v.len() - 1].trim();
let name = &v[..idx].trim();
addrs.push(Email {
name: Some(name.to_string()),
addr: Some(addr.to_string()),
});
}
} else {
addrs.push(Email {
name: Some(v),
addr: None,
});
}
}
}
}
Ok(addrs)
}