746 lines
25 KiB
Rust

use std::{
collections::HashMap,
fs::File,
hash::{DefaultHasher, Hash, Hasher},
time::Instant,
};
use log::{error, info, warn};
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use notmuch::Notmuch;
use crate::{
compute_offset_limit,
error::ServerError,
graphql::{
Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText,
Tag, Thread, ThreadSummary, UnhandledContentType,
},
linkify_html, sanitize_html,
};
const TEXT_PLAIN: &'static str = "text/plain";
const TEXT_HTML: &'static str = "text/html";
const IMAGE_JPEG: &'static str = "image/jpeg";
const IMAGE_PNG: &'static str = "image/png";
const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative";
const MULTIPART_MIXED: &'static str = "multipart/mixed";
const MULTIPART_RELATED: &'static str = "multipart/related";
const MAX_RAW_MESSAGE_SIZE: usize = 100_000;
// TODO(wathiede): decide good error type
pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Message>, ServerError> {
for t in thread_set.0 {
for _tn in t.0 {}
}
Ok(Vec::new())
}
pub async fn count(nm: &Notmuch, query: &str) -> Result<usize, ServerError> {
Ok(nm.count(query)?)
}
pub async fn search(
nm: &Notmuch,
after: Option<i32>,
before: Option<i32>,
first: Option<i32>,
last: Option<i32>,
query: String,
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
let (offset, mut limit) = compute_offset_limit(after, before, first, last);
if before.is_none() {
// When searching forward, the +1 is to see if there are more pages of data available.
// Searching backwards implies there's more pages forward, because the value represented by
// `before` is on the next page.
limit = limit + 1;
}
Ok(nm
.search(&query, offset as usize, limit as usize)?
.0
.into_iter()
.enumerate()
.map(|(i, ts)| {
(
offset + i as i32,
ThreadSummary {
thread: format!("thread:{}", ts.thread),
timestamp: ts.timestamp,
date_relative: ts.date_relative,
matched: ts.matched,
total: ts.total,
authors: ts.authors,
subject: ts.subject,
tags: ts.tags,
},
)
})
.collect())
}
pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
let now = Instant::now();
let unread_msg_cnt: HashMap<String, usize> = if needs_unread {
// 10000 is an arbitrary number, if there's more than 10k unread messages, we'll
// get an inaccurate count.
nm.search("is:unread", 0, 10000)?
.0
.iter()
.fold(HashMap::new(), |mut m, ts| {
ts.tags.iter().for_each(|t| {
m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1);
});
m
})
} else {
HashMap::new()
};
let tags = nm
.tags()?
.into_iter()
.map(|tag| {
let mut hasher = DefaultHasher::new();
tag.hash(&mut hasher);
let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
let unread = if needs_unread {
*unread_msg_cnt.get(&tag).unwrap_or(&0)
} else {
0
};
Tag {
name: tag,
fg_color: "white".to_string(),
bg_color: hex,
unread,
}
})
.collect();
info!("Fetching tags took {} seconds", now.elapsed().as_secs_f32());
Ok(tags)
}
pub async fn thread(
nm: &Notmuch,
thread_id: String,
debug_content_tree: bool,
) -> Result<Thread, ServerError> {
// TODO(wathiede): normalize all email addresses through an address book with preferred
// display names (that default to the most commonly seen name).
let mut messages = Vec::new();
for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
let tags = nm.tags_for_query(&format!("id:{id}"))?;
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let from = email_addresses(&path, &m, "from")?;
let from = match from.len() {
0 => None,
1 => from.into_iter().next(),
_ => {
warn!(
"Got {} from addresses in message, truncating: {:?}",
from.len(),
from
);
from.into_iter().next()
}
};
let to = email_addresses(&path, &m, "to")?;
let cc = email_addresses(&path, &m, "cc")?;
let subject = m.headers.get_first_value("subject");
let timestamp = m
.headers
.get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok());
let cid_prefix = shared::urls::cid_prefix(None, &id);
let base_url = None;
let body = match extract_body(&m, &id)? {
Body::PlainText(PlainText { text, content_tree }) => {
let text = if text.len() > MAX_RAW_MESSAGE_SIZE {
format!(
"{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes",
&text[..MAX_RAW_MESSAGE_SIZE],
MAX_RAW_MESSAGE_SIZE
)
} else {
text
};
Body::Html(Html {
html: format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
sanitize_html(
&linkify_html(&text.trim_matches('\n')),
&cid_prefix,
&base_url
)?
),
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
Body::Html(Html { html, content_tree }) => Body::Html(Html {
html: sanitize_html(&html, &cid_prefix, &base_url)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
}),
Body::UnhandledContentType(UnhandledContentType { content_tree, .. }) => {
let body_start = mmap
.windows(2)
.take(20_000)
.position(|w| w == b"\n\n")
.unwrap_or(0);
let body = mmap[body_start + 2..].to_vec();
Body::UnhandledContentType(UnhandledContentType {
text: String::from_utf8(body)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
};
let headers = m
.headers
.iter()
.map(|h| Header {
key: h.get_key(),
value: h.get_value(),
})
.collect();
// TODO(wathiede): parse message and fill out attachments
let attachments = extract_attachments(&m, &id)?;
messages.push(Message {
id: format!("id:{id}"),
from,
to,
cc,
subject,
tags,
timestamp,
headers,
body,
path,
attachments,
});
}
messages.reverse();
// Find the first subject that's set. After reversing the vec, this should be the oldest
// message.
let subject: String = messages
.iter()
.skip_while(|m| m.subject.is_none())
.next()
.and_then(|m| m.subject.clone())
.unwrap_or("(NO SUBJECT)".to_string());
Ok(Thread::Email(EmailThread {
thread_id,
subject,
messages,
}))
}
fn email_addresses(
path: &str,
m: &ParsedMail,
header_name: &str,
) -> Result<Vec<Email>, ServerError> {
let mut addrs = Vec::new();
for header_value in m.headers.get_all_values(header_name) {
match mailparse::addrparse(&header_value) {
Ok(mal) => {
for ma in mal.into_inner() {
match ma {
mailparse::MailAddr::Group(gi) => {
if !gi.group_name.contains("ndisclosed") {
println!("[{path}][{header_name}] Group: {gi}");
}
}
mailparse::MailAddr::Single(s) => addrs.push(Email {
name: s.display_name,
addr: Some(s.addr),
}), //println!("Single: {s}"),
}
}
}
Err(_) => {
let v = header_value;
if v.matches('@').count() == 1 {
if v.matches('<').count() == 1 && v.ends_with('>') {
let idx = v.find('<').unwrap();
let addr = &v[idx + 1..v.len() - 1].trim();
let name = &v[..idx].trim();
addrs.push(Email {
name: Some(name.to_string()),
addr: Some(addr.to_string()),
});
}
} else {
addrs.push(Email {
name: Some(v),
addr: None,
});
}
}
}
}
Ok(addrs)
}
pub fn cid_attachment_bytes(nm: &Notmuch, id: &str, cid: &str) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, _cur_idx| {
info!("{cid} {:?}", get_content_id(&sp.headers));
if let Some(h_cid) = get_content_id(&sp.headers) {
let h_cid = &h_cid[1..h_cid.len() - 1];
if h_cid == cid {
let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}
pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| {
if cur_idx == idx {
let attachment = extract_attachment(&sp, id, idx).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}
fn extract_body(m: &ParsedMail, id: &str) -> Result<Body, ServerError> {
let mut part_addr = Vec::new();
part_addr.push(id.to_string());
let body = m.get_body()?;
let ret = match m.ctype.mimetype.as_str() {
TEXT_PLAIN => return Ok(Body::text(body)),
TEXT_HTML => return Ok(Body::html(body)),
MULTIPART_MIXED => extract_mixed(m, &mut part_addr),
MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr),
MULTIPART_RELATED => extract_related(m, &mut part_addr),
_ => extract_unhandled(m),
};
if let Err(err) = ret {
error!("Failed to extract body: {err:?}");
return Ok(extract_unhandled(m)?);
}
ret
}
fn extract_unhandled(m: &ParsedMail) -> Result<Body, ServerError> {
let msg = format!(
"Unhandled body content type:\n{}\n{}",
render_content_type_tree(m),
m.get_body()?,
);
Ok(Body::UnhandledContentType(UnhandledContentType {
text: msg,
content_tree: render_content_type_tree(m),
}))
}
// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_MIXED,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
];
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE {
return extract_alternative(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_MIXED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_RELATED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_alternative failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}");
}
let mut parts = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
part_addr.push(idx.to_string());
match sp.ctype.mimetype.as_str() {
MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?),
MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?),
TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)),
TEXT_HTML => parts.push(Body::html(sp.get_body()?)),
IMAGE_JPEG | IMAGE_PNG => {
let pcd = sp.get_content_disposition();
let filename = pcd
.params
.get("filename")
.map(|s| s.clone())
.unwrap_or("".to_string());
// Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side.
if pcd.disposition == mailparse::DispositionType::Inline {
// TODO: make URL generation more programatic based on what the frontend has
// mapped
parts.push(Body::html(format!(
r#"<img src="/api/view/attachment/{}/{}/{filename}">"#,
part_addr[0],
part_addr
.iter()
.skip(1)
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(".")
)));
}
}
_ => (),
}
part_addr.pop();
}
Ok(flatten_body_parts(&parts))
}
fn flatten_body_parts(parts: &[Body]) -> Body {
let html = parts
.iter()
.map(|p| match p {
Body::PlainText(PlainText { text, .. }) => {
format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
Body::Html(Html { html, .. }) => html.clone(),
Body::UnhandledContentType(UnhandledContentType { text, .. }) => {
error!("text len {}", text.len());
format!(
r#"<p class="view-part-unhandled">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
})
.collect::<Vec<_>>()
.join("\n");
info!("flatten_body_parts {} {html}", parts.len());
Body::html(html)
}
fn extract_related(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
// TODO(wathiede): collect related things and change return type to new Body arm.
let handled_types = vec![
MULTIPART_ALTERNATIVE,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_RELATED} contains the following unhandled mimetypes {unhandled_types:?}");
}
for (i, sp) in m.subparts.iter().enumerate() {
if sp.ctype.mimetype == IMAGE_PNG || sp.ctype.mimetype == IMAGE_JPEG {
info!("sp.ctype {:#?}", sp.ctype);
//info!("sp.headers {:#?}", sp.headers);
if let Some(cid) = sp.headers.get_first_value("Content-Id") {
let mut part_id = part_addr.clone();
part_id.push(i.to_string());
info!("cid: {cid} part_id {part_id:?}");
}
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == MULTIPART_ALTERNATIVE {
return extract_alternative(m, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_related failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
fn walk_attachments<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
) -> Option<T> {
let mut cur_addr = Vec::new();
walk_attachments_inner(m, visitor, &mut cur_addr)
}
fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
cur_addr: &mut Vec<usize>,
) -> Option<T> {
for (idx, sp) in m.subparts.iter().enumerate() {
cur_addr.push(idx);
let val = visitor(sp, &cur_addr);
if val.is_some() {
return val;
}
let val = walk_attachments_inner(sp, visitor, cur_addr);
if val.is_some() {
return val;
}
cur_addr.pop();
}
None
}
// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, ServerError> {
let mut attachments = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
// Filter out inline attachements, they're flattened into the body of the message.
if attachment.disposition == DispositionType::Attachment {
attachments.push(attachment);
}
}
}
Ok(attachments)
}
fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option<Attachment> {
let pcd = m.get_content_disposition();
// TODO: do we need to handle empty filename attachments, or should we change the definition of
// Attachment::filename?
let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else {
return None;
};
// TODO: grab this from somewhere
let content_id = None;
let bytes = match m.get_body_raw() {
Ok(bytes) => bytes,
Err(err) => {
error!("failed to get body for attachment: {err}");
return None;
}
};
return Some(Attachment {
id: id.to_string(),
idx: idx
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("."),
disposition: pcd.disposition.into(),
filename: Some(filename),
size: bytes.len(),
// TODO: what is the default for ctype?
// TODO: do we want to use m.ctype.params for anything?
content_type: Some(m.ctype.mimetype.clone()),
content_id,
bytes,
});
}
pub fn get_attachment_filename(header_value: &str) -> &str {
info!("get_attachment_filename {header_value}");
// Strip last "
let v = &header_value[..header_value.len() - 1];
if let Some(idx) = v.rfind('"') {
&v[idx + 1..]
} else {
""
}
}
pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
if let Some(v) = headers.get_first_value("Content-Type") {
if let Some(idx) = v.find(';') {
return Some(v[..idx].to_string());
} else {
return Some(v);
}
}
None
}
fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
headers.get_first_value("Content-Id")
}
fn render_content_type_tree(m: &ParsedMail) -> String {
const WIDTH: usize = 4;
const SKIP_HEADERS: [&str; 4] = [
"Authentication-Results",
"DKIM-Signature",
"Received",
"Received-SPF",
];
fn render_ct_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
for sp in &m.subparts {
parts.push(render_ct_rec(sp, depth + 1))
}
parts.join("\n")
}
fn render_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
let indent = " ".repeat(depth * WIDTH);
if !m.ctype.charset.is_empty() {
parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
}
for (k, v) in m.ctype.params.iter() {
parts.push(format!("{indent} {k}: {v}"));
}
if !m.headers.is_empty() {
parts.push(format!("{indent} == headers =="));
for h in &m.headers {
if h.get_key().starts_with('X') {
continue;
}
if SKIP_HEADERS.contains(&h.get_key().as_str()) {
continue;
}
parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value()));
}
}
for sp in &m.subparts {
parts.push(render_rec(sp, depth + 1))
}
parts.join("\n")
}
format!(
"Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*",
render_ct_rec(m, 1),
render_rec(m, 1),
SKIP_HEADERS.join("\n ")
)
}
pub async fn set_read_status<'ctx>(
nm: &Notmuch,
query: &str,
unread: bool,
) -> Result<bool, ServerError> {
if unread {
nm.tag_add("unread", &format!("{query}"))?;
} else {
nm.tag_remove("unread", &format!("{query}"))?;
}
Ok(true)
}