letterbox/server/src/graphql.rs

use std::{
    collections::HashMap,
    fs::File,
    hash::{DefaultHasher, Hash, Hasher},
    str::FromStr,
};

use async_graphql::{
    connection::{self, Connection, Edge},
    Context, EmptyMutation, EmptySubscription, Enum, Error, FieldResult, Object, Schema,
    SimpleObject, Union,
};
use log::{error, info, warn};
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use notmuch::Notmuch;
use rocket::time::Instant;

pub struct QueryRoot;

/// # Number of seconds since the Epoch
pub type UnixTime = isize;

/// # Thread ID, sans "thread:"
pub type ThreadId = String;

#[derive(Debug, SimpleObject)]
pub struct ThreadSummary {
    pub thread: ThreadId,
    pub timestamp: UnixTime,
    /// user-friendly timestamp
    pub date_relative: String,
    /// number of matched messages
    pub matched: isize,
    /// total messages in thread
    pub total: isize,
    /// comma-separated names with | between matched and unmatched
    pub authors: String,
    pub subject: String,
    pub tags: Vec<String>,
}

#[derive(Debug, SimpleObject)]
pub struct Thread {
    subject: String,
    messages: Vec<Message>,
}

#[derive(Debug, SimpleObject)]
pub struct Message {
    // Message-ID for message, prepend `id:<id>` to search in notmuch
    pub id: String,
    // First From header found in email
    pub from: Option<Email>,
    // All To headers found in email
    pub to: Vec<Email>,
    // All CC headers found in email
    pub cc: Vec<Email>,
    // First Subject header found in email
    pub subject: Option<String>,
    // Parsed Date header, if found and valid
    pub timestamp: Option<i64>,
    // Headers
    pub headers: Vec<Header>,
    // The body contents
    pub body: Body,
    // On disk location of message
    pub path: String,
    pub attachments: Vec<Attachment>,
    pub tags: Vec<String>,
}

// Content-Type: image/jpeg; name="PXL_20231125_204826860.jpg"
// Content-Disposition: attachment; filename="PXL_20231125_204826860.jpg"
// Content-Transfer-Encoding: base64
// Content-ID: <f_lponoluo1>
// X-Attachment-Id: f_lponoluo1
#[derive(Debug, SimpleObject)]
pub struct Attachment {
    filename: String,
    content_type: Option<String>,
    content_id: Option<String>,
}

#[derive(Debug, Enum, Copy, Clone, Eq, PartialEq)]
enum DispositionType {
    Inline,
    Attachment,
}

impl FromStr for DispositionType {
    type Err = String;

    // Required method
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(match s {
            "inline" => DispositionType::Inline,
            "attachment" => DispositionType::Attachment,
            c => return Err(format!("unknown disposition type: {c}")),
        })
    }
}

#[derive(Debug, SimpleObject)]
pub struct Header {
    key: String,
    value: String,
}

#[derive(Debug)]
pub struct UnhandledContentType {
    text: String,
}

#[Object]
impl UnhandledContentType {
    async fn contents(&self) -> &str {
        &self.text
    }
}

#[derive(Debug)]
pub struct PlainText {
    text: String,
    content_tree: String,
}

#[Object]
impl PlainText {
    async fn contents(&self) -> &str {
        &self.text
    }
    async fn content_tree(&self) -> &str {
        &self.content_tree
    }
}

#[derive(Debug)]
pub struct Html {
    html: String,
    content_tree: String,
}

#[Object]
impl Html {
    async fn contents(&self) -> &str {
        &self.html
    }
    async fn content_tree(&self) -> &str {
        &self.content_tree
    }
    async fn headers(&self) -> Vec<Header> {
        Vec::new()
    }
}

#[derive(Debug, Union)]
pub enum Body {
    UnhandledContentType(UnhandledContentType),
    PlainText(PlainText),
    Html(Html),
}

impl Body {
    fn html(html: String) -> Body {
        Body::Html(Html {
            html,
            content_tree: "".to_string(),
        })
    }
    fn text(text: String) -> Body {
        Body::PlainText(PlainText {
            text,
            content_tree: "".to_string(),
        })
    }
}

#[derive(Debug, SimpleObject)]
pub struct Email {
    pub name: Option<String>,
    pub addr: Option<String>,
}

#[derive(SimpleObject)]
struct Tag {
    name: String,
    fg_color: String,
    bg_color: String,
    unread: usize,
}

#[Object]
impl QueryRoot {
    async fn count<'ctx>(&self, ctx: &Context<'ctx>, query: String) -> Result<usize, Error> {
        let nm = ctx.data_unchecked::<Notmuch>();
        Ok(nm.count(&query)?)
    }

    async fn search<'ctx>(
        &self,
        ctx: &Context<'ctx>,
        after: Option<String>,
        before: Option<String>,
        first: Option<i32>,
        last: Option<i32>,
        query: String,
    ) -> Result<Connection<usize, ThreadSummary>, Error> {
        let nm = ctx.data_unchecked::<Notmuch>();
        connection::query(
            after,
            before,
            first,
            last,
            |after, before, first, last| async move {
                let total = nm.count(&query)?;
                let (first, last) = if let (None, None) = (first, last) {
                    info!("neither first nor last set, defaulting first to 20");
                    (Some(20), None)
                } else {
                    (first, last)
                };

                let mut start = after.map(|after| after + 1).unwrap_or(0);
                let mut end = before.unwrap_or(total);
                if let Some(first) = first {
                    end = (start + first).min(end);
                }
                if let Some(last) = last {
                    start = if last > end - start { end } else { end - last };
                }

                let count = end - start;
                let slice: Vec<ThreadSummary> = nm
                    .search(&query, start, count)?
                    .0
                    .into_iter()
                    .map(|ts| ThreadSummary {
                        thread: ts.thread,
                        timestamp: ts.timestamp,
                        date_relative: ts.date_relative,
                        matched: ts.matched,
                        total: ts.total,
                        authors: ts.authors,
                        subject: ts.subject,
                        tags: ts.tags,
                    })
                    .collect();

                let mut connection = Connection::new(start > 0, end < total);
                connection.edges.extend(
                    slice
                        .into_iter()
                        .enumerate()
                        .map(|(idx, item)| Edge::new(start + idx, item)),
                );
                Ok::<_, Error>(connection)
            },
        )
        .await
    }

    async fn tags<'ctx>(&self, ctx: &Context<'ctx>) -> FieldResult<Vec<Tag>> {
        let nm = ctx.data_unchecked::<Notmuch>();
        let now = Instant::now();
        let needs_unread = ctx.look_ahead().field("unread").exists();
        let unread_msg_cnt: HashMap<String, usize> = if needs_unread {
            // 10000 is an arbitrary number, if there's more than 10k unread messages, we'll
            //  get an inaccurate count.
            nm.search("is:unread", 0, 10000)?
                .0
                .iter()
                .fold(HashMap::new(), |mut m, ts| {
                    ts.tags.iter().for_each(|t| {
                        m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1);
                    });
                    m
                })
        } else {
            HashMap::new()
        };
        let tags = nm
            .tags()?
            .into_iter()
            .map(|tag| {
                let mut hasher = DefaultHasher::new();
                tag.hash(&mut hasher);
                let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
                let unread = if needs_unread {
                    *unread_msg_cnt.get(&tag).unwrap_or(&0)
                } else {
                    0
                };
                Tag {
                    name: tag,
                    fg_color: "white".to_string(),
                    bg_color: hex,
                    unread,
                }
            })
            .collect();
        info!("Fetching tags took {}", now.elapsed());
        Ok(tags)
    }
    async fn thread<'ctx>(&self, ctx: &Context<'ctx>, thread_id: String) -> Result<Thread, Error> {
        // TODO(wathiede): normalize all email addresses through an address book with preferred
        // display names (that default to the most commonly seen name).
        let nm = ctx.data_unchecked::<Notmuch>();
        let debug_content_tree = ctx
            .look_ahead()
            .field("messages")
            .field("body")
            .field("contentTree")
            .exists();
        let mut messages = Vec::new();
        for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
            info!("{id}\nfile: {path}");
            let msg = nm.show(&format!("id:{id}"))?;
            let tags = msg.0[0].0[0]
                .0
                .as_ref()
                .map(|m| m.tags.clone())
                .unwrap_or_else(Vec::default);
            let file = File::open(&path)?;
            let mmap = unsafe { MmapOptions::new().map(&file)? };
            let m = parse_mail(&mmap)?;
            let from = email_addresses(&path, &m, "from")?;
            let from = match from.len() {
                0 => None,
                1 => from.into_iter().next(),
                _ => {
                    warn!(
                        "Got {} from addresses in message, truncating: {:?}",
                        from.len(),
                        from
                    );
                    from.into_iter().next()
                }
            };
            let to = email_addresses(&path, &m, "to")?;
            let cc = email_addresses(&path, &m, "cc")?;
            let subject = m.headers.get_first_value("subject");
            let timestamp = m
                .headers
                .get_first_value("date")
                .and_then(|d| mailparse::dateparse(&d).ok());
            let body = match extract_body(&m)? {
                Body::PlainText(PlainText { text, content_tree }) => Body::PlainText(PlainText {
                    text,
                    content_tree: if debug_content_tree {
                        render_content_type_tree(&m)
                    } else {
                        content_tree
                    },
                }),
                Body::Html(Html { html, content_tree }) => Body::Html(Html {
                    html: ammonia::clean(&html),
                    content_tree: if debug_content_tree {
                        render_content_type_tree(&m)
                    } else {
                        content_tree
                    },
                }),
                b => b,
            };
            let headers = m
                .headers
                .iter()
                .map(|h| Header {
                    key: h.get_key(),
                    value: h.get_value(),
                })
                .collect();
            // TODO(wathiede): parse message and fill out attachments
            let attachments = extract_attachments(&m)?;
            messages.push(Message {
                id,
                from,
                to,
                cc,
                subject,
                tags,
                timestamp,
                headers,
                body,
                path,
                attachments,
            });
        }
        messages.reverse();
        // Find the first subject that's set. After reversing the vec, this should be the oldest
        // message.
        let subject: String = messages
            .iter()
            .skip_while(|m| m.subject.is_none())
            .next()
            .and_then(|m| m.subject.clone())
            .unwrap_or("(NO SUBJECT)".to_string());
        Ok(Thread { subject, messages })
    }
}

fn extract_body(m: &ParsedMail) -> Result<Body, Error> {
    let body = m.get_body()?;
    let ret = match m.ctype.mimetype.as_str() {
        "text/plain" => return Ok(Body::text(body)),
        "text/html" => return Ok(Body::html(body)),
        "multipart/mixed" => extract_mixed(m),
        "multipart/alternative" => extract_alternative(m),
        _ => extract_unhandled(m),
    };
    if let Err(err) = ret {
        error!("Failed to extract body: {err:?}");
        return Ok(extract_unhandled(m)?);
    }
    ret
}

fn extract_unhandled(m: &ParsedMail) -> Result<Body, Error> {
    let msg = format!(
        "Unhandled body content type:\n{}",
        render_content_type_tree(m)
    );
    warn!("{}", msg);
    Ok(Body::UnhandledContentType(UnhandledContentType {
        text: msg,
    }))
}

// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail) -> Result<Body, Error> {
    for sp in &m.subparts {
        if sp.ctype.mimetype == "text/html" {
            let body = sp.get_body()?;
            return Ok(Body::html(body));
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == "text/plain" {
            let body = sp.get_body()?;
            return Ok(Body::text(body));
        }
    }
    Err("extract_alternative".into())
}

// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail) -> Result<Body, Error> {
    for sp in &m.subparts {
        if sp.ctype.mimetype == "multipart/alternative" {
            return extract_alternative(sp);
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == "multipart/related" {
            return extract_related(sp);
        }
    }
    for sp in &m.subparts {
        let body = sp.get_body()?;
        match sp.ctype.mimetype.as_str() {
            "text/plain" => return Ok(Body::text(body)),
            "text/html" => return Ok(Body::html(body)),
            _ => (),
        }
    }
    Err("extract_mixed".into())
}

fn extract_related(m: &ParsedMail) -> Result<Body, Error> {
    // TODO(wathiede): collect related things and change return type to new Body arm.
    for sp in &m.subparts {
        if sp.ctype.mimetype == "text/html" {
            let body = sp.get_body()?;
            return Ok(Body::html(body));
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == "text/plain" {
            let body = sp.get_body()?;
            return Ok(Body::text(body));
        }
    }
    Err("extract_related".into())
}

// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail) -> Result<Vec<Attachment>, Error> {
    let mut attachements = Vec::new();
    for sp in &m.subparts {
        for h in &sp.headers {
            if h.get_key() == "Content-Disposition" {
                let v = h.get_value();
                if let Some(idx) = v.find(";") {
                    let dt = &v[..idx];
                    match DispositionType::from_str(dt) {
                        Ok(DispositionType::Attachment) => {
                            attachements.push(Attachment {
                                filename: get_attachment_filename(&v).to_string(),
                                content_type: get_content_type(&sp.headers),
                                content_id: get_content_id(&sp.headers),
                            });
                        }
                        Ok(DispositionType::Inline) => continue,
                        Err(e) => {
                            warn!("failed to parse Content-Disposition type '{}'", e);
                            continue;
                        }
                    };
                } else {
                    warn!("header has Content-Disposition missing ';'");
                    continue;
                }
            }
        }
    }
    Ok(attachements)
}

fn get_attachment_filename(header_value: &str) -> &str {
    // Strip last "
    let v = &header_value[..header_value.len() - 1];
    if let Some(idx) = v.rfind('"') {
        &v[idx + 1..]
    } else {
        ""
    }
}

fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
    for h in headers {
        if h.get_key() == "Content-Type" {
            let v = h.get_value();
            if let Some(idx) = v.find(';') {
                return Some(v[..idx].to_string());
            } else {
                return Some(v);
            }
        }
    }
    None
}

fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
    for h in headers {
        if h.get_key() == "Content-ID" {
            return Some(h.get_value());
        }
    }
    None
}

fn render_content_type_tree(m: &ParsedMail) -> String {
    const WIDTH: usize = 4;
    fn render_rec(m: &ParsedMail, depth: usize) -> String {
        let mut parts = Vec::new();
        let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
        parts.push(msg);
        let indent = " ".repeat(depth * WIDTH);
        if !m.ctype.charset.is_empty() {
            parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
        }
        for (k, v) in m.ctype.params.iter() {
            parts.push(format!("{indent} {k}: {v}"));
        }
        if !m.headers.is_empty() {
            parts.push(format!("{indent} == headers =="));
            for h in &m.headers {
                parts.push(format!("{indent} {}: {}", h.get_key(), h.get_value()));
            }
        }
        for sp in &m.subparts {
            parts.push(render_rec(sp, depth + 1))
        }
        parts.join("\n")
    }
    render_rec(m, 1)
}

pub type GraphqlSchema = Schema<QueryRoot, EmptyMutation, EmptySubscription>;

fn email_addresses(path: &str, m: &ParsedMail, header_name: &str) -> Result<Vec<Email>, Error> {
    let mut addrs = Vec::new();
    for header_value in m.headers.get_all_values(header_name) {
        match mailparse::addrparse(&header_value) {
            Ok(mal) => {
                for ma in mal.into_inner() {
                    match ma {
                        mailparse::MailAddr::Group(gi) => {
                            if !gi.group_name.contains("ndisclosed") {
                                println!("[{path}][{header_name}] Group: {gi}");
                            }
                        }
                        mailparse::MailAddr::Single(s) => addrs.push(Email {
                            name: s.display_name,
                            addr: Some(s.addr),
                        }), //println!("Single: {s}"),
                    }
                }
            }
            Err(_) => {
                let v = header_value;
                if v.matches('@').count() == 1 {
                    if v.matches('<').count() == 1 && v.ends_with('>') {
                        let idx = v.find('<').unwrap();
                        let addr = &v[idx + 1..v.len() - 1].trim();
                        let name = &v[..idx].trim();
                        addrs.push(Email {
                            name: Some(name.to_string()),
                            addr: Some(addr.to_string()),
                        });
                    }
                } else {
                    addrs.push(Email {
                        name: Some(v),
                        addr: None,
                    });
                }
            }
        }
    }
    Ok(addrs)
}