WIP move thread loading for notmuch into nm mod

This commit is contained in:
Bill Thiede 2024-07-21 09:31:37 -07:00
parent dd09bc3168
commit 65fcbd4b77
5 changed files with 688 additions and 659 deletions

View File

@ -18,9 +18,8 @@ use rocket_cors::{AllowedHeaders, AllowedOrigins};
use serde::Deserialize;
use server::{
error::ServerError,
graphql::{
attachment_bytes, cid_attachment_bytes, Attachment, GraphqlSchema, Mutation, QueryRoot,
},
graphql::{Attachment, GraphqlSchema, Mutation, QueryRoot},
nm::{attachment_bytes, cid_attachment_bytes},
};
use sqlx::postgres::PgPool;

View File

@ -1,6 +1,10 @@
use std::{str::Utf8Error, string::FromUtf8Error};
use mailparse::MailParseError;
use thiserror::Error;
use crate::SanitizeError;
#[derive(Error, Debug)]
pub enum ServerError {
#[error("notmuch")]
@ -15,4 +19,12 @@ pub enum ServerError {
PartNotFound,
#[error("sqlx error")]
SQLXError(#[from] sqlx::Error),
#[error("html sanitize error")]
SanitizeError(#[from] SanitizeError),
#[error("UTF8 error")]
Utf8Error(#[from] Utf8Error),
#[error("FromUTF8 error")]
FromUtf8Error(#[from] FromUtf8Error),
#[error("error")]
StringError(String),
}

View File

@ -1,4 +1,3 @@
const MAX_RAW_MESSAGE_SIZE: usize = 100_000;
use std::fs::File;
use async_graphql::{
@ -19,14 +18,6 @@ pub type UnixTime = isize;
/// # Thread ID, sans "thread:"
pub type ThreadId = String;
const TEXT_PLAIN: &'static str = "text/plain";
const TEXT_HTML: &'static str = "text/html";
const IMAGE_JPEG: &'static str = "image/jpeg";
const IMAGE_PNG: &'static str = "image/png";
const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative";
const MULTIPART_MIXED: &'static str = "multipart/mixed";
const MULTIPART_RELATED: &'static str = "multipart/related";
#[derive(Debug, SimpleObject)]
pub struct ThreadSummary {
pub thread: ThreadId,
@ -45,9 +36,9 @@ pub struct ThreadSummary {
#[derive(Debug, SimpleObject)]
pub struct Thread {
thread_id: String,
subject: String,
messages: Vec<Message>,
pub thread_id: String,
pub subject: String,
pub messages: Vec<Message>,
}
#[derive(Debug, SimpleObject)]
@ -91,16 +82,45 @@ pub struct Attachment {
pub bytes: Vec<u8>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Disposition {
pub r#type: DispositionType,
pub filename: Option<String>,
pub size: Option<usize>,
}
#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
pub enum DispositionType {
Inline,
Attachment,
}
impl From<mailparse::DispositionType> for DispositionType {
fn from(value: mailparse::DispositionType) -> Self {
match value {
mailparse::DispositionType::Inline => DispositionType::Inline,
mailparse::DispositionType::Attachment => DispositionType::Attachment,
dt => panic!("unhandled DispositionType {dt:?}"),
}
}
}
impl Default for DispositionType {
fn default() -> Self {
DispositionType::Attachment
}
}
#[derive(Debug, SimpleObject)]
pub struct Header {
key: String,
value: String,
pub key: String,
pub value: String,
}
#[derive(Debug)]
pub struct UnhandledContentType {
text: String,
content_tree: String,
pub text: String,
pub content_tree: String,
}
#[Object]
@ -115,8 +135,8 @@ impl UnhandledContentType {
#[derive(Debug)]
pub struct PlainText {
text: String,
content_tree: String,
pub text: String,
pub content_tree: String,
}
#[Object]
@ -131,8 +151,8 @@ impl PlainText {
#[derive(Debug)]
pub struct Html {
html: String,
content_tree: String,
pub html: String,
pub content_tree: String,
}
#[Object]
@ -156,13 +176,13 @@ pub enum Body {
}
impl Body {
fn html(html: String) -> Body {
pub fn html(html: String) -> Body {
Body::Html(Html {
html,
content_tree: "".to_string(),
})
}
fn text(text: String) -> Body {
pub fn text(text: String) -> Body {
Body::PlainText(PlainText {
text,
content_tree: "".to_string(),
@ -222,8 +242,6 @@ impl QueryRoot {
Ok(tags)
}
async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
// TODO(wathiede): normalize all email addresses through an address book with preferred
// display names (that default to the most commonly seen name).
let nm = ctx.data_unchecked::<Notmuch>();
let debug_content_tree = ctx
.look_ahead()
@ -231,124 +249,7 @@ impl QueryRoot {
.field("body")
.field("contentTree")
.exists();
let mut messages = Vec::new();
for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
let tags = nm.tags_for_query(&format!("id:{id}"))?;
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let from = email_addresses(&path, &m, "from")?;
let from = match from.len() {
0 => None,
1 => from.into_iter().next(),
_ => {
warn!(
"Got {} from addresses in message, truncating: {:?}",
from.len(),
from
);
from.into_iter().next()
}
};
let to = email_addresses(&path, &m, "to")?;
let cc = email_addresses(&path, &m, "cc")?;
let subject = m.headers.get_first_value("subject");
let timestamp = m
.headers
.get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok());
let cid_prefix = shared::urls::cid_prefix(None, &id);
let body = match extract_body(&m, &id)? {
Body::PlainText(PlainText { text, content_tree }) => {
let text = if text.len() > MAX_RAW_MESSAGE_SIZE {
format!(
"{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes",
&text[..MAX_RAW_MESSAGE_SIZE],
MAX_RAW_MESSAGE_SIZE
)
} else {
text
};
Body::Html(Html {
html: format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
sanitize_html(&linkify_html(&text.trim_matches('\n')), &cid_prefix)?
),
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
Body::Html(Html { html, content_tree }) => Body::Html(Html {
html: sanitize_html(&html, &cid_prefix)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
}),
Body::UnhandledContentType(UnhandledContentType { content_tree, .. }) => {
let body_start = mmap
.windows(2)
.take(20_000)
.position(|w| w == b"\n\n")
.unwrap_or(0);
let body = mmap[body_start + 2..].to_vec();
Body::UnhandledContentType(UnhandledContentType {
text: String::from_utf8(body)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
};
let headers = m
.headers
.iter()
.map(|h| Header {
key: h.get_key(),
value: h.get_value(),
})
.collect();
// TODO(wathiede): parse message and fill out attachments
let attachments = extract_attachments(&m, &id)?;
messages.push(Message {
id,
from,
to,
cc,
subject,
tags,
timestamp,
headers,
body,
path,
attachments,
});
}
messages.reverse();
// Find the first subject that's set. After reversing the vec, this should be the oldest
// message.
let subject: String = messages
.iter()
.skip_while(|m| m.subject.is_none())
.next()
.and_then(|m| m.subject.clone())
.unwrap_or("(NO SUBJECT)".to_string());
Ok(Thread {
thread_id,
subject,
messages,
})
Ok(nm::thread(nm, thread_id, debug_content_tree).await?)
}
}
@ -395,506 +296,3 @@ impl Mutation {
}
pub type GraphqlSchema = Schema<QueryRoot, Mutation, EmptySubscription>;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Disposition {
pub r#type: DispositionType,
pub filename: Option<String>,
pub size: Option<usize>,
}
#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
pub enum DispositionType {
Inline,
Attachment,
}
impl From<mailparse::DispositionType> for DispositionType {
fn from(value: mailparse::DispositionType) -> Self {
match value {
mailparse::DispositionType::Inline => DispositionType::Inline,
mailparse::DispositionType::Attachment => DispositionType::Attachment,
dt => panic!("unhandled DispositionType {dt:?}"),
}
}
}
impl Default for DispositionType {
fn default() -> Self {
DispositionType::Attachment
}
}
fn extract_body(m: &ParsedMail, id: &str) -> Result<Body, Error> {
let mut part_addr = Vec::new();
part_addr.push(id.to_string());
let body = m.get_body()?;
let ret = match m.ctype.mimetype.as_str() {
TEXT_PLAIN => return Ok(Body::text(body)),
TEXT_HTML => return Ok(Body::html(body)),
MULTIPART_MIXED => extract_mixed(m, &mut part_addr),
MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr),
MULTIPART_RELATED => extract_related(m, &mut part_addr),
_ => extract_unhandled(m),
};
if let Err(err) = ret {
error!("Failed to extract body: {err:?}");
return Ok(extract_unhandled(m)?);
}
ret
}
fn extract_unhandled(m: &ParsedMail) -> Result<Body, Error> {
let msg = format!(
"Unhandled body content type:\n{}\n{}",
render_content_type_tree(m),
m.get_body()?,
);
Ok(Body::UnhandledContentType(UnhandledContentType {
text: msg,
content_tree: render_content_type_tree(m),
}))
}
// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, Error> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_MIXED,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
];
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE {
return extract_alternative(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_MIXED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_RELATED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(format!(
"extract_alternative failed to find suitable subpart, searched: {:?}",
handled_types
)
.into())
}
// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, Error> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}");
}
let mut parts = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
part_addr.push(idx.to_string());
match sp.ctype.mimetype.as_str() {
MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?),
MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?),
TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)),
TEXT_HTML => parts.push(Body::html(sp.get_body()?)),
IMAGE_JPEG | IMAGE_PNG => {
let pcd = sp.get_content_disposition();
let filename = pcd
.params
.get("filename")
.map(|s| s.clone())
.unwrap_or("".to_string());
// Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side.
if pcd.disposition == mailparse::DispositionType::Inline {
parts.push(Body::html(format!(
r#"<img src="/view/attachment/{}/{}/{filename}">"#,
part_addr[0],
part_addr
.iter()
.skip(1)
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(".")
)));
}
}
_ => (),
}
part_addr.pop();
}
Ok(flatten_body_parts(&parts))
}
fn flatten_body_parts(parts: &[Body]) -> Body {
let html = parts
.iter()
.map(|p| match p {
Body::PlainText(PlainText { text, .. }) => {
format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
Body::Html(Html { html, .. }) => html.clone(),
Body::UnhandledContentType(UnhandledContentType { text, .. }) => {
error!("text len {}", text.len());
format!(
r#"<p class="view-part-unhandled">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
})
.collect::<Vec<_>>()
.join("\n");
info!("flatten_body_parts {} {html}", parts.len());
Body::html(html)
}
fn extract_related(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, Error> {
// TODO(wathiede): collect related things and change return type to new Body arm.
let handled_types = vec![
MULTIPART_ALTERNATIVE,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_RELATED} contains the following unhandled mimetypes {unhandled_types:?}");
}
for (i, sp) in m.subparts.iter().enumerate() {
if sp.ctype.mimetype == IMAGE_PNG || sp.ctype.mimetype == IMAGE_JPEG {
info!("sp.ctype {:#?}", sp.ctype);
//info!("sp.headers {:#?}", sp.headers);
if let Some(cid) = sp.headers.get_first_value("Content-Id") {
let mut part_id = part_addr.clone();
part_id.push(i.to_string());
info!("cid: {cid} part_id {part_id:?}");
}
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == MULTIPART_ALTERNATIVE {
return extract_alternative(m, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(format!(
"extract_related failed to find suitable subpart, searched: {:?}",
handled_types
)
.into())
}
fn walk_attachments<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
) -> Option<T> {
let mut cur_addr = Vec::new();
walk_attachments_inner(m, visitor, &mut cur_addr)
}
fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
cur_addr: &mut Vec<usize>,
) -> Option<T> {
for (idx, sp) in m.subparts.iter().enumerate() {
cur_addr.push(idx);
let val = visitor(sp, &cur_addr);
if val.is_some() {
return val;
}
let val = walk_attachments_inner(sp, visitor, cur_addr);
if val.is_some() {
return val;
}
cur_addr.pop();
}
None
}
// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, Error> {
let mut attachments = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
// Filter out inline attachements, they're flattened into the body of the message.
if attachment.disposition == DispositionType::Attachment {
attachments.push(attachment);
}
}
}
Ok(attachments)
}
fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option<Attachment> {
let pcd = m.get_content_disposition();
// TODO: do we need to handle empty filename attachments, or should we change the definition of
// Attachment::filename?
let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else {
return None;
};
// TODO: grab this from somewhere
let content_id = None;
let bytes = match m.get_body_raw() {
Ok(bytes) => bytes,
Err(err) => {
error!("failed to get body for attachment: {err}");
return None;
}
};
return Some(Attachment {
id: id.to_string(),
idx: idx
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("."),
disposition: pcd.disposition.into(),
filename: Some(filename),
size: bytes.len(),
// TODO: what is the default for ctype?
// TODO: do we want to use m.ctype.params for anything?
content_type: Some(m.ctype.mimetype.clone()),
content_id,
bytes,
});
}
pub fn get_attachment_filename(header_value: &str) -> &str {
info!("get_attachment_filename {header_value}");
// Strip last "
let v = &header_value[..header_value.len() - 1];
if let Some(idx) = v.rfind('"') {
&v[idx + 1..]
} else {
""
}
}
pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
if let Some(v) = headers.get_first_value("Content-Type") {
if let Some(idx) = v.find(';') {
return Some(v[..idx].to_string());
} else {
return Some(v);
}
}
None
}
fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
headers.get_first_value("Content-Id")
}
fn render_content_type_tree(m: &ParsedMail) -> String {
const WIDTH: usize = 4;
const SKIP_HEADERS: [&str; 4] = [
"Authentication-Results",
"DKIM-Signature",
"Received",
"Received-SPF",
];
fn render_ct_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
for sp in &m.subparts {
parts.push(render_ct_rec(sp, depth + 1))
}
parts.join("\n")
}
fn render_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
let indent = " ".repeat(depth * WIDTH);
if !m.ctype.charset.is_empty() {
parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
}
for (k, v) in m.ctype.params.iter() {
parts.push(format!("{indent} {k}: {v}"));
}
if !m.headers.is_empty() {
parts.push(format!("{indent} == headers =="));
for h in &m.headers {
if h.get_key().starts_with('X') {
continue;
}
if SKIP_HEADERS.contains(&h.get_key().as_str()) {
continue;
}
parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value()));
}
}
for sp in &m.subparts {
parts.push(render_rec(sp, depth + 1))
}
parts.join("\n")
}
format!(
"Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*",
render_ct_rec(m, 1),
render_rec(m, 1),
SKIP_HEADERS.join("\n ")
)
}
fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result<Vec<Email>, Error> {
let mut addrs = Vec::new();
for header_value in m.headers.get_all_values(header_name) {
match mailparse::addrparse(&header_value) {
Ok(mal) => {
for ma in mal.into_inner() {
match ma {
mailparse::MailAddr::Group(gi) => {
if !gi.group_name.contains("ndisclosed") {
println!("[{path}][{header_name}] Group: {gi}");
}
}
mailparse::MailAddr::Single(s) => addrs.push(Email {
name: s.display_name,
addr: Some(s.addr),
}), //println!("Single: {s}"),
}
}
}
Err(_) => {
let v = header_value;
if v.matches('@').count() == 1 {
if v.matches('<').count() == 1 && v.ends_with('>') {
let idx = v.find('<').unwrap();
let addr = &v[idx + 1..v.len() - 1].trim();
let name = &v[..idx].trim();
addrs.push(Email {
name: Some(name.to_string()),
addr: Some(addr.to_string()),
});
}
} else {
addrs.push(Email {
name: Some(v),
addr: None,
});
}
}
}
}
Ok(addrs)
}
pub fn cid_attachment_bytes(nm: &Notmuch, id: &str, cid: &str) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, _cur_idx| {
info!("{cid} {:?}", get_content_id(&sp.headers));
if let Some(h_cid) = get_content_id(&sp.headers) {
let h_cid = &h_cid[1..h_cid.len() - 1];
if h_cid == cid {
let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}
pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| {
if cur_idx == idx {
let attachment = extract_attachment(&sp, id, idx).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}

View File

@ -1,7 +1,7 @@
pub mod error;
pub mod graphql;
mod newsreader;
mod nm;
pub mod newsreader;
pub mod nm;
use css_inline::{CSSInliner, InlineError, InlineOptions};
use linkify::{LinkFinder, LinkKind};

View File

@ -1,23 +1,40 @@
use std::{
collections::HashMap,
fs::File,
hash::{DefaultHasher, Hash, Hasher},
time::Instant,
};
use async_graphql::connection::{self, Connection, Edge};
use log::info;
use async_graphql::{
connection::{self, Connection, Edge},
Context, EmptySubscription, Enum, FieldResult, Object, Schema, SimpleObject, Union,
};
use log::{error, info, warn};
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use notmuch::Notmuch;
use shared::Message;
use crate::{
error,
graphql::{Tag, ThreadSummary},
error::ServerError,
graphql::{
Attachment, Body, DispositionType, Email, Header, Html, Message, PlainText, Tag, Thread,
ThreadSummary, UnhandledContentType,
},
linkify_html, newsreader, nm, sanitize_html,
};
const TEXT_PLAIN: &'static str = "text/plain";
const TEXT_HTML: &'static str = "text/html";
const IMAGE_JPEG: &'static str = "image/jpeg";
const IMAGE_PNG: &'static str = "image/png";
const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative";
const MULTIPART_MIXED: &'static str = "multipart/mixed";
const MULTIPART_RELATED: &'static str = "multipart/related";
const MAX_RAW_MESSAGE_SIZE: usize = 100_000;
// TODO(wathiede): decide good error type
pub fn threadset_to_messages(
thread_set: notmuch::ThreadSet,
) -> Result<Vec<Message>, error::ServerError> {
pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Message>, ServerError> {
for t in thread_set.0 {
for _tn in t.0 {}
}
@ -85,7 +102,7 @@ pub async fn search(
.await
}
pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result<Vec<Tag>, error::ServerError> {
pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
let now = Instant::now();
let unread_msg_cnt: HashMap<String, usize> = if needs_unread {
// 10000 is an arbitrary number, if there's more than 10k unread messages, we'll
@ -125,3 +142,606 @@ pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result<Vec<Tag>, error::ServerE
info!("Fetching tags took {} seconds", now.elapsed().as_secs_f32());
Ok(tags)
}
pub async fn thread(
nm: &Notmuch,
thread_id: String,
debug_content_tree: bool,
) -> Result<Thread, ServerError> {
// TODO(wathiede): normalize all email addresses through an address book with preferred
// display names (that default to the most commonly seen name).
let mut messages = Vec::new();
for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
let tags = nm.tags_for_query(&format!("id:{id}"))?;
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let from = email_addresses(&path, &m, "from")?;
let from = match from.len() {
0 => None,
1 => from.into_iter().next(),
_ => {
warn!(
"Got {} from addresses in message, truncating: {:?}",
from.len(),
from
);
from.into_iter().next()
}
};
let to = email_addresses(&path, &m, "to")?;
let cc = email_addresses(&path, &m, "cc")?;
let subject = m.headers.get_first_value("subject");
let timestamp = m
.headers
.get_first_value("date")
.and_then(|d| mailparse::dateparse(&d).ok());
let cid_prefix = shared::urls::cid_prefix(None, &id);
let body = match extract_body(&m, &id)? {
Body::PlainText(PlainText { text, content_tree }) => {
let text = if text.len() > MAX_RAW_MESSAGE_SIZE {
format!(
"{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes",
&text[..MAX_RAW_MESSAGE_SIZE],
MAX_RAW_MESSAGE_SIZE
)
} else {
text
};
Body::Html(Html {
html: format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
sanitize_html(&linkify_html(&text.trim_matches('\n')), &cid_prefix)?
),
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
Body::Html(Html { html, content_tree }) => Body::Html(Html {
html: sanitize_html(&html, &cid_prefix)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
}),
Body::UnhandledContentType(UnhandledContentType { content_tree, .. }) => {
let body_start = mmap
.windows(2)
.take(20_000)
.position(|w| w == b"\n\n")
.unwrap_or(0);
let body = mmap[body_start + 2..].to_vec();
Body::UnhandledContentType(UnhandledContentType {
text: String::from_utf8(body)?,
content_tree: if debug_content_tree {
render_content_type_tree(&m)
} else {
content_tree
},
})
}
};
let headers = m
.headers
.iter()
.map(|h| Header {
key: h.get_key(),
value: h.get_value(),
})
.collect();
// TODO(wathiede): parse message and fill out attachments
let attachments = extract_attachments(&m, &id)?;
messages.push(Message {
id,
from,
to,
cc,
subject,
tags,
timestamp,
headers,
body,
path,
attachments,
});
}
messages.reverse();
// Find the first subject that's set. After reversing the vec, this should be the oldest
// message.
let subject: String = messages
.iter()
.skip_while(|m| m.subject.is_none())
.next()
.and_then(|m| m.subject.clone())
.unwrap_or("(NO SUBJECT)".to_string());
Ok(Thread {
thread_id,
subject,
messages,
})
}
fn email_addresses(
path: &str,
m: &ParsedMail,
header_name: &str,
) -> Result<Vec<Email>, ServerError> {
let mut addrs = Vec::new();
for header_value in m.headers.get_all_values(header_name) {
match mailparse::addrparse(&header_value) {
Ok(mal) => {
for ma in mal.into_inner() {
match ma {
mailparse::MailAddr::Group(gi) => {
if !gi.group_name.contains("ndisclosed") {
println!("[{path}][{header_name}] Group: {gi}");
}
}
mailparse::MailAddr::Single(s) => addrs.push(Email {
name: s.display_name,
addr: Some(s.addr),
}), //println!("Single: {s}"),
}
}
}
Err(_) => {
let v = header_value;
if v.matches('@').count() == 1 {
if v.matches('<').count() == 1 && v.ends_with('>') {
let idx = v.find('<').unwrap();
let addr = &v[idx + 1..v.len() - 1].trim();
let name = &v[..idx].trim();
addrs.push(Email {
name: Some(name.to_string()),
addr: Some(addr.to_string()),
});
}
} else {
addrs.push(Email {
name: Some(v),
addr: None,
});
}
}
}
}
Ok(addrs)
}
pub fn cid_attachment_bytes(nm: &Notmuch, id: &str, cid: &str) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, _cur_idx| {
info!("{cid} {:?}", get_content_id(&sp.headers));
if let Some(h_cid) = get_content_id(&sp.headers) {
let h_cid = &h_cid[1..h_cid.len() - 1];
if h_cid == cid {
let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}
pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result<Attachment, ServerError> {
let files = nm.files(id)?;
let Some(path) = files.first() else {
warn!("failed to find files for message {id}");
return Err(ServerError::PartNotFound);
};
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| {
if cur_idx == idx {
let attachment = extract_attachment(&sp, id, idx).unwrap_or(Attachment {
..Attachment::default()
});
return Some(attachment);
}
None
}) {
return Ok(attachment);
}
Err(ServerError::PartNotFound)
}
fn extract_body(m: &ParsedMail, id: &str) -> Result<Body, ServerError> {
let mut part_addr = Vec::new();
part_addr.push(id.to_string());
let body = m.get_body()?;
let ret = match m.ctype.mimetype.as_str() {
TEXT_PLAIN => return Ok(Body::text(body)),
TEXT_HTML => return Ok(Body::html(body)),
MULTIPART_MIXED => extract_mixed(m, &mut part_addr),
MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr),
MULTIPART_RELATED => extract_related(m, &mut part_addr),
_ => extract_unhandled(m),
};
if let Err(err) = ret {
error!("Failed to extract body: {err:?}");
return Ok(extract_unhandled(m)?);
}
ret
}
fn extract_unhandled(m: &ParsedMail) -> Result<Body, ServerError> {
let msg = format!(
"Unhandled body content type:\n{}\n{}",
render_content_type_tree(m),
m.get_body()?,
);
Ok(Body::UnhandledContentType(UnhandledContentType {
text: msg,
content_tree: render_content_type_tree(m),
}))
}
// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_MIXED,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
];
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE {
return extract_alternative(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_MIXED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_RELATED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_alternative failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}");
}
let mut parts = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
part_addr.push(idx.to_string());
match sp.ctype.mimetype.as_str() {
MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?),
MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?),
TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)),
TEXT_HTML => parts.push(Body::html(sp.get_body()?)),
IMAGE_JPEG | IMAGE_PNG => {
let pcd = sp.get_content_disposition();
let filename = pcd
.params
.get("filename")
.map(|s| s.clone())
.unwrap_or("".to_string());
// Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side.
if pcd.disposition == mailparse::DispositionType::Inline {
parts.push(Body::html(format!(
r#"<img src="/view/attachment/{}/{}/{filename}">"#,
part_addr[0],
part_addr
.iter()
.skip(1)
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(".")
)));
}
}
_ => (),
}
part_addr.pop();
}
Ok(flatten_body_parts(&parts))
}
fn flatten_body_parts(parts: &[Body]) -> Body {
let html = parts
.iter()
.map(|p| match p {
Body::PlainText(PlainText { text, .. }) => {
format!(
r#"<p class="view-part-text-plain">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
Body::Html(Html { html, .. }) => html.clone(),
Body::UnhandledContentType(UnhandledContentType { text, .. }) => {
error!("text len {}", text.len());
format!(
r#"<p class="view-part-unhandled">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&text.trim_matches('\n'))
)
}
})
.collect::<Vec<_>>()
.join("\n");
info!("flatten_body_parts {} {html}", parts.len());
Body::html(html)
}
fn extract_related(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
// TODO(wathiede): collect related things and change return type to new Body arm.
let handled_types = vec![
MULTIPART_ALTERNATIVE,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(&mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!("{MULTIPART_RELATED} contains the following unhandled mimetypes {unhandled_types:?}");
}
for (i, sp) in m.subparts.iter().enumerate() {
if sp.ctype.mimetype == IMAGE_PNG || sp.ctype.mimetype == IMAGE_JPEG {
info!("sp.ctype {:#?}", sp.ctype);
//info!("sp.headers {:#?}", sp.headers);
if let Some(cid) = sp.headers.get_first_value("Content-Id") {
let mut part_id = part_addr.clone();
part_id.push(i.to_string());
info!("cid: {cid} part_id {part_id:?}");
}
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == MULTIPART_ALTERNATIVE {
return extract_alternative(m, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_related failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
fn walk_attachments<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
) -> Option<T> {
let mut cur_addr = Vec::new();
walk_attachments_inner(m, visitor, &mut cur_addr)
}
fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
cur_addr: &mut Vec<usize>,
) -> Option<T> {
for (idx, sp) in m.subparts.iter().enumerate() {
cur_addr.push(idx);
let val = visitor(sp, &cur_addr);
if val.is_some() {
return val;
}
let val = walk_attachments_inner(sp, visitor, cur_addr);
if val.is_some() {
return val;
}
cur_addr.pop();
}
None
}
// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, ServerError> {
let mut attachments = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
// Filter out inline attachements, they're flattened into the body of the message.
if attachment.disposition == DispositionType::Attachment {
attachments.push(attachment);
}
}
}
Ok(attachments)
}
fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option<Attachment> {
let pcd = m.get_content_disposition();
// TODO: do we need to handle empty filename attachments, or should we change the definition of
// Attachment::filename?
let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else {
return None;
};
// TODO: grab this from somewhere
let content_id = None;
let bytes = match m.get_body_raw() {
Ok(bytes) => bytes,
Err(err) => {
error!("failed to get body for attachment: {err}");
return None;
}
};
return Some(Attachment {
id: id.to_string(),
idx: idx
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("."),
disposition: pcd.disposition.into(),
filename: Some(filename),
size: bytes.len(),
// TODO: what is the default for ctype?
// TODO: do we want to use m.ctype.params for anything?
content_type: Some(m.ctype.mimetype.clone()),
content_id,
bytes,
});
}
pub fn get_attachment_filename(header_value: &str) -> &str {
info!("get_attachment_filename {header_value}");
// Strip last "
let v = &header_value[..header_value.len() - 1];
if let Some(idx) = v.rfind('"') {
&v[idx + 1..]
} else {
""
}
}
pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
if let Some(v) = headers.get_first_value("Content-Type") {
if let Some(idx) = v.find(';') {
return Some(v[..idx].to_string());
} else {
return Some(v);
}
}
None
}
fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
headers.get_first_value("Content-Id")
}
fn render_content_type_tree(m: &ParsedMail) -> String {
const WIDTH: usize = 4;
const SKIP_HEADERS: [&str; 4] = [
"Authentication-Results",
"DKIM-Signature",
"Received",
"Received-SPF",
];
fn render_ct_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
for sp in &m.subparts {
parts.push(render_ct_rec(sp, depth + 1))
}
parts.join("\n")
}
fn render_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
let indent = " ".repeat(depth * WIDTH);
if !m.ctype.charset.is_empty() {
parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
}
for (k, v) in m.ctype.params.iter() {
parts.push(format!("{indent} {k}: {v}"));
}
if !m.headers.is_empty() {
parts.push(format!("{indent} == headers =="));
for h in &m.headers {
if h.get_key().starts_with('X') {
continue;
}
if SKIP_HEADERS.contains(&h.get_key().as_str()) {
continue;
}
parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value()));
}
}
for sp in &m.subparts {
parts.push(render_rec(sp, depth + 1))
}
parts.join("\n")
}
format!(
"Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*",
render_ct_rec(m, 1),
render_rec(m, 1),
SKIP_HEADERS.join("\n ")
)
}