letterbox/server/src/nm.rs

use std::{
    collections::HashMap,
    fs::File,
    hash::{DefaultHasher, Hash, Hasher},
    time::Instant,
};

use log::{error, info, warn};
use mailparse::{parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use notmuch::Notmuch;

use crate::{
    compute_offset_limit,
    error::ServerError,
    graphql::{
        Attachment, Body, DispositionType, Email, EmailThread, Header, Html, Message, PlainText,
        Tag, Thread, ThreadSummary, UnhandledContentType,
    },
    linkify_html, sanitize_html,
};

const TEXT_PLAIN: &'static str = "text/plain";
const TEXT_HTML: &'static str = "text/html";
const IMAGE_JPEG: &'static str = "image/jpeg";
const IMAGE_PNG: &'static str = "image/png";
const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative";
const MULTIPART_MIXED: &'static str = "multipart/mixed";
const MULTIPART_RELATED: &'static str = "multipart/related";

const MAX_RAW_MESSAGE_SIZE: usize = 100_000;

// TODO(wathiede): decide good error type
pub fn threadset_to_messages(thread_set: notmuch::ThreadSet) -> Result<Vec<Message>, ServerError> {
    for t in thread_set.0 {
        for _tn in t.0 {}
    }
    Ok(Vec::new())
}

pub async fn count(nm: &Notmuch, query: &str) -> Result<usize, ServerError> {
    Ok(nm.count(query)?)
}

pub async fn search(
    nm: &Notmuch,
    after: Option<i32>,
    before: Option<i32>,
    first: Option<i32>,
    last: Option<i32>,
    query: String,
) -> Result<Vec<(i32, ThreadSummary)>, async_graphql::Error> {
    let (offset, mut limit) = compute_offset_limit(after, before, first, last);
    if before.is_none() {
        // When searching forward, the +1 is to see if there are more pages of data available.
        // Searching backwards implies there's more pages forward, because the value represented by
        // `before` is on the next page.
        limit = limit + 1;
    }
    Ok(nm
        .search(&query, offset as usize, limit as usize)?
        .0
        .into_iter()
        .enumerate()
        .map(|(i, ts)| {
            (
                offset + i as i32,
                ThreadSummary {
                    thread: format!("thread:{}", ts.thread),
                    timestamp: ts.timestamp,
                    date_relative: ts.date_relative,
                    matched: ts.matched,
                    total: ts.total,
                    authors: ts.authors,
                    subject: ts.subject,
                    tags: ts.tags,
                },
            )
        })
        .collect())
}

pub fn tags(nm: &Notmuch, needs_unread: bool) -> Result<Vec<Tag>, ServerError> {
    let now = Instant::now();
    let unread_msg_cnt: HashMap<String, usize> = if needs_unread {
        // 10000 is an arbitrary number, if there's more than 10k unread messages, we'll
        //  get an inaccurate count.
        nm.search("is:unread", 0, 10000)?
            .0
            .iter()
            .fold(HashMap::new(), |mut m, ts| {
                ts.tags.iter().for_each(|t| {
                    m.entry(t.clone()).and_modify(|c| *c += 1).or_insert(1);
                });
                m
            })
    } else {
        HashMap::new()
    };
    let tags = nm
        .tags()?
        .into_iter()
        .map(|tag| {
            let mut hasher = DefaultHasher::new();
            tag.hash(&mut hasher);
            let hex = format!("#{:06x}", hasher.finish() % (1 << 24));
            let unread = if needs_unread {
                *unread_msg_cnt.get(&tag).unwrap_or(&0)
            } else {
                0
            };
            Tag {
                name: tag,
                fg_color: "white".to_string(),
                bg_color: hex,
                unread,
            }
        })
        .collect();
    info!("Fetching tags took {} seconds", now.elapsed().as_secs_f32());
    Ok(tags)
}

pub async fn thread(
    nm: &Notmuch,
    thread_id: String,
    debug_content_tree: bool,
) -> Result<Thread, ServerError> {
    // TODO(wathiede): normalize all email addresses through an address book with preferred
    // display names (that default to the most commonly seen name).
    let mut messages = Vec::new();
    for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) {
        let tags = nm.tags_for_query(&format!("id:{id}"))?;
        let file = File::open(&path)?;
        let mmap = unsafe { MmapOptions::new().map(&file)? };
        let m = parse_mail(&mmap)?;
        let from = email_addresses(&path, &m, "from")?;
        let from = match from.len() {
            0 => None,
            1 => from.into_iter().next(),
            _ => {
                warn!(
                    "Got {} from addresses in message, truncating: {:?}",
                    from.len(),
                    from
                );
                from.into_iter().next()
            }
        };
        let to = email_addresses(&path, &m, "to")?;
        let cc = email_addresses(&path, &m, "cc")?;
        let subject = m.headers.get_first_value("subject");
        let timestamp = m
            .headers
            .get_first_value("date")
            .and_then(|d| mailparse::dateparse(&d).ok());
        let cid_prefix = shared::urls::cid_prefix(None, &id);
        let base_url = None;
        let body = match extract_body(&m, &id)? {
            Body::PlainText(PlainText { text, content_tree }) => {
                let text = if text.len() > MAX_RAW_MESSAGE_SIZE {
                    format!(
                        "{}...\n\nMESSAGE WAS TRUNCATED @ {} bytes",
                        &text[..MAX_RAW_MESSAGE_SIZE],
                        MAX_RAW_MESSAGE_SIZE
                    )
                } else {
                    text
                };

                Body::Html(Html {
                    html: format!(
                        r#"<p class="view-part-text-plain">{}</p>"#,
                        // Trim newlines to prevent excessive white space at the beginning/end of
                        // presenation. Leave tabs and spaces incase plain text attempts to center a
                        // header on the first line.
                        sanitize_html(
                            &linkify_html(&text.trim_matches('\n')),
                            &cid_prefix,
                            &base_url
                        )?
                    ),
                    content_tree: if debug_content_tree {
                        render_content_type_tree(&m)
                    } else {
                        content_tree
                    },
                })
            }
            Body::Html(Html { html, content_tree }) => Body::Html(Html {
                html: sanitize_html(&html, &cid_prefix, &base_url)?,
                content_tree: if debug_content_tree {
                    render_content_type_tree(&m)
                } else {
                    content_tree
                },
            }),

            Body::UnhandledContentType(UnhandledContentType { content_tree, .. }) => {
                let body_start = mmap
                    .windows(2)
                    .take(20_000)
                    .position(|w| w == b"\n\n")
                    .unwrap_or(0);
                let body = mmap[body_start + 2..].to_vec();
                Body::UnhandledContentType(UnhandledContentType {
                    text: String::from_utf8(body)?,
                    content_tree: if debug_content_tree {
                        render_content_type_tree(&m)
                    } else {
                        content_tree
                    },
                })
            }
        };
        let headers = m
            .headers
            .iter()
            .map(|h| Header {
                key: h.get_key(),
                value: h.get_value(),
            })
            .collect();
        // TODO(wathiede): parse message and fill out attachments
        let attachments = extract_attachments(&m, &id)?;
        messages.push(Message {
            id: format!("id:{id}"),
            from,
            to,
            cc,
            subject,
            tags,
            timestamp,
            headers,
            body,
            path,
            attachments,
        });
    }
    messages.reverse();
    // Find the first subject that's set. After reversing the vec, this should be the oldest
    // message.
    let subject: String = messages
        .iter()
        .skip_while(|m| m.subject.is_none())
        .next()
        .and_then(|m| m.subject.clone())
        .unwrap_or("(NO SUBJECT)".to_string());
    Ok(Thread::Email(EmailThread {
        thread_id,
        subject,
        messages,
    }))
}

fn email_addresses(
    path: &str,
    m: &ParsedMail,
    header_name: &str,
) -> Result<Vec<Email>, ServerError> {
    let mut addrs = Vec::new();
    for header_value in m.headers.get_all_values(header_name) {
        match mailparse::addrparse(&header_value) {
            Ok(mal) => {
                for ma in mal.into_inner() {
                    match ma {
                        mailparse::MailAddr::Group(gi) => {
                            if !gi.group_name.contains("ndisclosed") {
                                println!("[{path}][{header_name}] Group: {gi}");
                            }
                        }
                        mailparse::MailAddr::Single(s) => addrs.push(Email {
                            name: s.display_name,
                            addr: Some(s.addr),
                        }), //println!("Single: {s}"),
                    }
                }
            }
            Err(_) => {
                let v = header_value;
                if v.matches('@').count() == 1 {
                    if v.matches('<').count() == 1 && v.ends_with('>') {
                        let idx = v.find('<').unwrap();
                        let addr = &v[idx + 1..v.len() - 1].trim();
                        let name = &v[..idx].trim();
                        addrs.push(Email {
                            name: Some(name.to_string()),
                            addr: Some(addr.to_string()),
                        });
                    }
                } else {
                    addrs.push(Email {
                        name: Some(v),
                        addr: None,
                    });
                }
            }
        }
    }
    Ok(addrs)
}

pub fn cid_attachment_bytes(nm: &Notmuch, id: &str, cid: &str) -> Result<Attachment, ServerError> {
    let files = nm.files(id)?;
    let Some(path) = files.first() else {
        warn!("failed to find files for message {id}");
        return Err(ServerError::PartNotFound);
    };
    let file = File::open(&path)?;
    let mmap = unsafe { MmapOptions::new().map(&file)? };
    let m = parse_mail(&mmap)?;
    if let Some(attachment) = walk_attachments(&m, |sp, _cur_idx| {
        info!("{cid} {:?}", get_content_id(&sp.headers));
        if let Some(h_cid) = get_content_id(&sp.headers) {
            let h_cid = &h_cid[1..h_cid.len() - 1];
            if h_cid == cid {
                let attachment = extract_attachment(&sp, id, &[]).unwrap_or(Attachment {
                    ..Attachment::default()
                });
                return Some(attachment);
            }
        }
        None
    }) {
        return Ok(attachment);
    }

    Err(ServerError::PartNotFound)
}

pub fn attachment_bytes(nm: &Notmuch, id: &str, idx: &[usize]) -> Result<Attachment, ServerError> {
    let files = nm.files(id)?;
    let Some(path) = files.first() else {
        warn!("failed to find files for message {id}");
        return Err(ServerError::PartNotFound);
    };
    let file = File::open(&path)?;
    let mmap = unsafe { MmapOptions::new().map(&file)? };
    let m = parse_mail(&mmap)?;
    if let Some(attachment) = walk_attachments(&m, |sp, cur_idx| {
        if cur_idx == idx {
            let attachment = extract_attachment(&sp, id, idx).unwrap_or(Attachment {
                ..Attachment::default()
            });
            return Some(attachment);
        }
        None
    }) {
        return Ok(attachment);
    }

    Err(ServerError::PartNotFound)
}

fn extract_body(m: &ParsedMail, id: &str) -> Result<Body, ServerError> {
    let mut part_addr = Vec::new();
    part_addr.push(id.to_string());
    let body = m.get_body()?;
    let ret = match m.ctype.mimetype.as_str() {
        TEXT_PLAIN => return Ok(Body::text(body)),
        TEXT_HTML => return Ok(Body::html(body)),
        MULTIPART_MIXED => extract_mixed(m, &mut part_addr),
        MULTIPART_ALTERNATIVE => extract_alternative(m, &mut part_addr),
        MULTIPART_RELATED => extract_related(m, &mut part_addr),
        _ => extract_unhandled(m),
    };
    if let Err(err) = ret {
        error!("Failed to extract body: {err:?}");
        return Ok(extract_unhandled(m)?);
    }
    ret
}

fn extract_unhandled(m: &ParsedMail) -> Result<Body, ServerError> {
    let msg = format!(
        "Unhandled body content type:\n{}\n{}",
        render_content_type_tree(m),
        m.get_body()?,
    );
    Ok(Body::UnhandledContentType(UnhandledContentType {
        text: msg,
        content_tree: render_content_type_tree(m),
    }))
}

// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
fn extract_alternative(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
    let handled_types = vec![
        MULTIPART_ALTERNATIVE,
        MULTIPART_MIXED,
        MULTIPART_RELATED,
        TEXT_HTML,
        TEXT_PLAIN,
    ];
    for sp in &m.subparts {
        if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE {
            return extract_alternative(sp, part_addr);
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype.as_str() == MULTIPART_MIXED {
            return extract_related(sp, part_addr);
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype.as_str() == MULTIPART_RELATED {
            return extract_related(sp, part_addr);
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype.as_str() == TEXT_HTML {
            let body = sp.get_body()?;
            return Ok(Body::html(body));
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype.as_str() == TEXT_PLAIN {
            let body = sp.get_body()?;
            return Ok(Body::text(body));
        }
    }
    Err(ServerError::StringError(format!(
        "extract_alternative failed to find suitable subpart, searched: {:?}",
        handled_types
    )))
}

// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
    let handled_types = vec![
        MULTIPART_ALTERNATIVE,
        MULTIPART_RELATED,
        TEXT_HTML,
        TEXT_PLAIN,
        IMAGE_JPEG,
        IMAGE_PNG,
    ];
    let mut unhandled_types: Vec<_> = m
        .subparts
        .iter()
        .map(|sp| sp.ctype.mimetype.as_str())
        .filter(|mt| !handled_types.contains(&mt))
        .collect();
    unhandled_types.sort();
    if !unhandled_types.is_empty() {
        warn!("{MULTIPART_MIXED} contains the following unhandled mimetypes {unhandled_types:?}");
    }
    let mut parts = Vec::new();
    for (idx, sp) in m.subparts.iter().enumerate() {
        part_addr.push(idx.to_string());
        match sp.ctype.mimetype.as_str() {
            MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?),
            MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?),
            TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)),
            TEXT_HTML => parts.push(Body::html(sp.get_body()?)),
            IMAGE_JPEG | IMAGE_PNG => {
                let pcd = sp.get_content_disposition();
                let filename = pcd
                    .params
                    .get("filename")
                    .map(|s| s.clone())
                    .unwrap_or("".to_string());
                // Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side.
                if pcd.disposition == mailparse::DispositionType::Inline {
                    // TODO: make URL generation more programatic based on what the frontend has
                    // mapped
                    parts.push(Body::html(format!(
                        r#"<img src="/api/view/attachment/{}/{}/{filename}">"#,
                        part_addr[0],
                        part_addr
                            .iter()
                            .skip(1)
                            .map(|i| i.to_string())
                            .collect::<Vec<_>>()
                            .join(".")
                    )));
                }
            }
            _ => (),
        }
        part_addr.pop();
    }
    Ok(flatten_body_parts(&parts))
}

fn flatten_body_parts(parts: &[Body]) -> Body {
    let html = parts
        .iter()
        .map(|p| match p {
            Body::PlainText(PlainText { text, .. }) => {
                format!(
                    r#"<p class="view-part-text-plain">{}</p>"#,
                    // Trim newlines to prevent excessive white space at the beginning/end of
                    // presenation. Leave tabs and spaces incase plain text attempts to center a
                    // header on the first line.
                    linkify_html(&text.trim_matches('\n'))
                )
            }
            Body::Html(Html { html, .. }) => html.clone(),
            Body::UnhandledContentType(UnhandledContentType { text, .. }) => {
                error!("text len {}", text.len());
                format!(
                    r#"<p class="view-part-unhandled">{}</p>"#,
                    // Trim newlines to prevent excessive white space at the beginning/end of
                    // presenation. Leave tabs and spaces incase plain text attempts to center a
                    // header on the first line.
                    linkify_html(&text.trim_matches('\n'))
                )
            }
        })
        .collect::<Vec<_>>()
        .join("\n");

    info!("flatten_body_parts {} {html}", parts.len());
    Body::html(html)
}

fn extract_related(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
    // TODO(wathiede): collect related things and change return type to new Body arm.
    let handled_types = vec![
        MULTIPART_ALTERNATIVE,
        TEXT_HTML,
        TEXT_PLAIN,
        IMAGE_JPEG,
        IMAGE_PNG,
    ];
    let mut unhandled_types: Vec<_> = m
        .subparts
        .iter()
        .map(|sp| sp.ctype.mimetype.as_str())
        .filter(|mt| !handled_types.contains(&mt))
        .collect();
    unhandled_types.sort();
    if !unhandled_types.is_empty() {
        warn!("{MULTIPART_RELATED} contains the following unhandled mimetypes {unhandled_types:?}");
    }

    for (i, sp) in m.subparts.iter().enumerate() {
        if sp.ctype.mimetype == IMAGE_PNG || sp.ctype.mimetype == IMAGE_JPEG {
            info!("sp.ctype {:#?}", sp.ctype);
            //info!("sp.headers {:#?}", sp.headers);
            if let Some(cid) = sp.headers.get_first_value("Content-Id") {
                let mut part_id = part_addr.clone();
                part_id.push(i.to_string());
                info!("cid: {cid} part_id {part_id:?}");
            }
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == MULTIPART_ALTERNATIVE {
            return extract_alternative(m, part_addr);
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == TEXT_HTML {
            let body = sp.get_body()?;
            return Ok(Body::html(body));
        }
    }
    for sp in &m.subparts {
        if sp.ctype.mimetype == TEXT_PLAIN {
            let body = sp.get_body()?;
            return Ok(Body::text(body));
        }
    }
    Err(ServerError::StringError(format!(
        "extract_related failed to find suitable subpart, searched: {:?}",
        handled_types
    )))
}

fn walk_attachments<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
    m: &ParsedMail,
    visitor: F,
) -> Option<T> {
    let mut cur_addr = Vec::new();
    walk_attachments_inner(m, visitor, &mut cur_addr)
}

fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
    m: &ParsedMail,
    visitor: F,
    cur_addr: &mut Vec<usize>,
) -> Option<T> {
    for (idx, sp) in m.subparts.iter().enumerate() {
        cur_addr.push(idx);
        let val = visitor(sp, &cur_addr);
        if val.is_some() {
            return val;
        }
        let val = walk_attachments_inner(sp, visitor, cur_addr);
        if val.is_some() {
            return val;
        }
        cur_addr.pop();
    }
    None
}

// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, ServerError> {
    let mut attachments = Vec::new();
    for (idx, sp) in m.subparts.iter().enumerate() {
        if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
            // Filter out inline attachements, they're flattened into the body of the message.
            if attachment.disposition == DispositionType::Attachment {
                attachments.push(attachment);
            }
        }
    }
    Ok(attachments)
}

fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option<Attachment> {
    let pcd = m.get_content_disposition();
    // TODO: do we need to handle empty filename attachments, or should we change the definition of
    // Attachment::filename?
    let Some(filename) = pcd.params.get("filename").map(|f| f.clone()) else {
        return None;
    };

    // TODO: grab this from somewhere
    let content_id = None;
    let bytes = match m.get_body_raw() {
        Ok(bytes) => bytes,
        Err(err) => {
            error!("failed to get body for attachment: {err}");
            return None;
        }
    };
    return Some(Attachment {
        id: id.to_string(),
        idx: idx
            .iter()
            .map(|i| i.to_string())
            .collect::<Vec<_>>()
            .join("."),
        disposition: pcd.disposition.into(),
        filename: Some(filename),
        size: bytes.len(),
        // TODO: what is the default for ctype?
        // TODO: do we want to use m.ctype.params for anything?
        content_type: Some(m.ctype.mimetype.clone()),
        content_id,
        bytes,
    });
}

pub fn get_attachment_filename(header_value: &str) -> &str {
    info!("get_attachment_filename {header_value}");
    // Strip last "
    let v = &header_value[..header_value.len() - 1];
    if let Some(idx) = v.rfind('"') {
        &v[idx + 1..]
    } else {
        ""
    }
}

pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
    if let Some(v) = headers.get_first_value("Content-Type") {
        if let Some(idx) = v.find(';') {
            return Some(v[..idx].to_string());
        } else {
            return Some(v);
        }
    }
    None
}

fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
    headers.get_first_value("Content-Id")
}

fn render_content_type_tree(m: &ParsedMail) -> String {
    const WIDTH: usize = 4;
    const SKIP_HEADERS: [&str; 4] = [
        "Authentication-Results",
        "DKIM-Signature",
        "Received",
        "Received-SPF",
    ];
    fn render_ct_rec(m: &ParsedMail, depth: usize) -> String {
        let mut parts = Vec::new();
        let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
        parts.push(msg);
        for sp in &m.subparts {
            parts.push(render_ct_rec(sp, depth + 1))
        }
        parts.join("\n")
    }
    fn render_rec(m: &ParsedMail, depth: usize) -> String {
        let mut parts = Vec::new();
        let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
        parts.push(msg);
        let indent = " ".repeat(depth * WIDTH);
        if !m.ctype.charset.is_empty() {
            parts.push(format!("{indent} Character Set: {}", m.ctype.charset));
        }
        for (k, v) in m.ctype.params.iter() {
            parts.push(format!("{indent} {k}: {v}"));
        }
        if !m.headers.is_empty() {
            parts.push(format!("{indent} == headers =="));
            for h in &m.headers {
                if h.get_key().starts_with('X') {
                    continue;
                }
                if SKIP_HEADERS.contains(&h.get_key().as_str()) {
                    continue;
                }

                parts.push(format!("{indent} {}: {}", h.get_key_ref(), h.get_value()));
            }
        }
        for sp in &m.subparts {
            parts.push(render_rec(sp, depth + 1))
        }
        parts.join("\n")
    }
    format!(
        "Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n  {}\n  X.*",
        render_ct_rec(m, 1),
        render_rec(m, 1),
        SKIP_HEADERS.join("\n  ")
    )
}

pub async fn set_read_status<'ctx>(
    nm: &Notmuch,
    query: &str,
    unread: bool,
) -> Result<bool, ServerError> {
    if unread {
        nm.tag_add("unread", &format!("{query}"))?;
    } else {
        nm.tag_remove("unread", &format!("{query}"))?;
    }
    Ok(true)
}