letterbox/procmail2notmuch/src/main.rs

use std::{collections::HashMap, convert::Infallible, io::Write, str::FromStr};

use clap::{Parser, Subcommand};
use serde::{Deserialize, Serialize};
use sqlx::{types::Json, PgPool};

#[derive(
    Copy, Clone, Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize,
)]
enum MatchType {
    From,
    Sender,
    To,
    Cc,
    Subject,
    ListId,
    DeliveredTo,
    XForwardedTo,
    ReplyTo,
    XOriginalTo,
    XSpam,
    Body,
    #[default]
    Unknown,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Match {
    match_type: MatchType,
    needle: String,
}

#[derive(Debug, Default, Serialize, Deserialize)]
struct Rule {
    stop_on_match: bool,
    matches: Vec<Match>,
    tag: Option<String>,
}

fn unescape(s: &str) -> String {
    s.replace('\\', "")
}

fn cleanup_match(prefix: &str, s: &str) -> String {
    unescape(&s[prefix.len()..]).replace(".*", "")
}

mod matches {
    // From https://linux.die.net/man/5/procmailrc
    // If the regular expression contains '^TO_' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^-a-zA-Z0-9_.])?)'
    // If the regular expression contains '^TO' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^a-zA-Z])?)', which should catch all destination specifications containing a specific word.

    pub const TO: &'static str = "TO";
    pub const CC: &'static str = "Cc";
    pub const TOCC: &'static str = "(TO|Cc)";
    pub const FROM: &'static str = "From";
    pub const SENDER: &'static str = "Sender";
    pub const SUBJECT: &'static str = "Subject";
    pub const DELIVERED_TO: &'static str = "Delivered-To";
    pub const X_FORWARDED_TO: &'static str = "X-Forwarded-To";
    pub const REPLY_TO: &'static str = "Reply-To";
    pub const X_ORIGINAL_TO: &'static str = "X-Original-To";
    pub const LIST_ID: &'static str = "List-ID";
    pub const X_SPAM: &'static str = "X-Spam";
    pub const X_SPAM_FLAG: &'static str = "X-Spam-Flag";
}

impl FromStr for Match {
    type Err = Infallible;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        // Examples:
        //   "* 1^0 ^TOsonyrewards.com@xinu.tv"
        //   "* ^TOsonyrewards.com@xinu.tv"
        let mut it = s.split_whitespace().skip(1);
        let mut needle = it.next().unwrap();
        if needle == "1^0" {
            needle = it.next().unwrap();
        }
        let mut needle = vec![needle];
        needle.extend(it);
        let needle = needle.join(" ");
        let first = needle.chars().nth(0).unwrap_or(' ');
        use matches::*;
        if first == '^' {
            let needle = &needle[1..];
            if needle.starts_with(TO) {
                return Ok(Match {
                    match_type: MatchType::To,
                    needle: cleanup_match(TO, needle),
                });
            } else if needle.starts_with(FROM) {
                return Ok(Match {
                    match_type: MatchType::From,
                    needle: cleanup_match(FROM, needle),
                });
            } else if needle.starts_with(CC) {
                return Ok(Match {
                    match_type: MatchType::Cc,
                    needle: cleanup_match(CC, needle),
                });
            } else if needle.starts_with(TOCC) {
                return Ok(Match {
                    match_type: MatchType::To,
                    needle: cleanup_match(TOCC, needle),
                });
            } else if needle.starts_with(SENDER) {
                return Ok(Match {
                    match_type: MatchType::Sender,
                    needle: cleanup_match(SENDER, needle),
                });
            } else if needle.starts_with(SUBJECT) {
                return Ok(Match {
                    match_type: MatchType::Subject,
                    needle: cleanup_match(SUBJECT, needle),
                });
            } else if needle.starts_with(X_ORIGINAL_TO) {
                return Ok(Match {
                    match_type: MatchType::XOriginalTo,
                    needle: cleanup_match(X_ORIGINAL_TO, needle),
                });
            } else if needle.starts_with(LIST_ID) {
                return Ok(Match {
                    match_type: MatchType::ListId,
                    needle: cleanup_match(LIST_ID, needle),
                });
            } else if needle.starts_with(REPLY_TO) {
                return Ok(Match {
                    match_type: MatchType::ReplyTo,
                    needle: cleanup_match(REPLY_TO, needle),
                });
            } else if needle.starts_with(X_SPAM_FLAG) {
                return Ok(Match {
                    match_type: MatchType::XSpam,
                    needle: '*'.to_string(),
                });
            } else if needle.starts_with(X_SPAM) {
                return Ok(Match {
                    match_type: MatchType::XSpam,
                    needle: '*'.to_string(),
                });
            } else if needle.starts_with(DELIVERED_TO) {
                return Ok(Match {
                    match_type: MatchType::DeliveredTo,
                    needle: cleanup_match(DELIVERED_TO, needle),
                });
            } else if needle.starts_with(X_FORWARDED_TO) {
                return Ok(Match {
                    match_type: MatchType::XForwardedTo,
                    needle: cleanup_match(X_FORWARDED_TO, needle),
                });
            } else {
                unreachable!("needle: '{needle}'")
            }
        } else {
            return Ok(Match {
                match_type: MatchType::Body,
                needle: cleanup_match("", &needle),
            });
        }
    }
}

#[derive(Debug, Subcommand)]
enum Mode {
    Debug,
    Notmuchrc,
    LoadSql {
        #[arg(short, long, default_value = env!("DATABASE_URL"))]
        dsn: String,
    },
}

/// Simple program to greet a person
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    #[arg(short, long, default_value = "/home/wathiede/dotfiles/procmailrc")]
    input: String,

    #[command(subcommand)]
    mode: Mode,
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let mut rules = Vec::new();
    let mut cur_rule = Rule::default();
    for l in std::fs::read_to_string(args.input)?.lines() {
        let l = if let Some(idx) = l.find('#') {
            &l[..idx]
        } else {
            l
        }
        .trim();
        if l.is_empty() {
            continue;
        }
        if l.find('=').is_some() {
            // Probably a variable assignment, skip line
            continue;
        }
        let first = l.chars().nth(0).unwrap_or(' ');
        match first {
            ':' => {
                // start of rule
            }
            '*' => {
                // add to current rule
                let m: Match = l.parse()?;
                cur_rule.matches.push(m);
            }
            '.' => {
                // delivery to folder
                cur_rule.tag = Some(cleanup_match(
                    "",
                    &l.replace('.', "/")
                        .replace(' ', "")
                        .trim_matches('/')
                        .to_string(),
                ));
                rules.push(cur_rule);
                cur_rule = Rule::default();
            }
            '/' => cur_rule = Rule::default(), // Ex. /dev/null
            '|' => cur_rule = Rule::default(), // external command
            '$' => {
                // TODO(wathiede): tag messages with no other tag as 'inbox'
                cur_rule.tag = Some(cleanup_match("", "inbox"));
                rules.push(cur_rule);
                cur_rule = Rule::default();
            } // variable, should only be $DEFAULT in my config
            _ => panic!("Unhandled first character '{}'\nLine: {}", first, l),
        }
    }
    match args.mode {
        Mode::Debug => print_rules(&rules),
        Mode::Notmuchrc => notmuch_from_rules(std::io::stdout(), &rules)?,
        Mode::LoadSql { dsn } => load_sql(&dsn, &rules).await?,
    }
    Ok(())
}

fn print_rules(rules: &[Rule]) {
    let mut tally = HashMap::new();
    for r in rules {
        for m in &r.matches {
            *tally.entry(m.match_type).or_insert(0) += 1;
        }
    }
    let mut sorted: Vec<_> = tally.iter().map(|(k, v)| (v, k)).collect();
    sorted.sort();
    sorted.reverse();
    for (v, k) in sorted {
        println!("{k:?}: {v}");
    }
}

fn notmuch_from_rules<W: Write>(mut w: W, rules: &[Rule]) -> anyhow::Result<()> {
    // TODO(wathiede): if reindexing this many tags is too slow, see if combining rules per tag is
    // faster.
    let mut lines = Vec::new();
    for r in rules {
        for m in &r.matches {
            if let Some(t) = &r.tag {
                if let MatchType::Unknown = m.match_type {
                    eprintln!("rule has unknown match {:?}", r);
                    continue;
                }

                let rule = match m.match_type {
                    MatchType::From => "from:",
                    // TODO(wathiede): something more specific?
                    MatchType::Sender => "from:",
                    MatchType::To => "to:",
                    MatchType::Cc => "to:",
                    MatchType::Subject => "subject:",
                    MatchType::ListId => "List-ID:",
                    MatchType::Body => "",
                    // TODO(wathiede): these will probably require adding fields to notmuch
                    // index. Handle them later.
                    MatchType::DeliveredTo
                    | MatchType::XForwardedTo
                    | MatchType::ReplyTo
                    | MatchType::XOriginalTo
                    | MatchType::XSpam => continue,
                    MatchType::Unknown => unreachable!(),
                };
                // Preserve unread status if run with --remove-all
                lines.push(format!(
                    r#"-unprocessed +{} +unread -- is:unread tag:unprocessed {}"{}""#,
                    t, rule, m.needle
                ));
                lines.push(format!(
                    // TODO(wathiede): this assumes `notmuch new` is configured to add
                    // `tag:unprocessed` to all new mail.
                    r#"-unprocessed +{} -- tag:unprocessed {}"{}""#,
                    t, rule, m.needle
                ));
            }
        }
    }
    lines.sort();
    for l in lines {
        writeln!(w, "{l}")?;
    }
    Ok(())
}

async fn load_sql(dsn: &str, rules: &[Rule]) -> anyhow::Result<()> {
    let pool = PgPool::connect(dsn).await?;
    sqlx::migrate!("../server/migrations").run(&pool).await?;
    println!("clearing email_rule table");
    sqlx::query!("DELETE FROM email_rule")
        .execute(&pool)
        .await?;

    for (order, rule) in rules.iter().enumerate() {
        println!("inserting {order}: {rule:?}");
        sqlx::query!(
            r#"
        INSERT INTO email_rule (sort_order, rule)
        VALUES ($1, $2)
            "#,
            order as i32,
            Json(rule) as _
        )
        .execute(&pool)
        .await?;
    }
    Ok(())
}