Add mail tagging support

This commit is contained in:
Bill Thiede 2025-04-21 21:15:55 -07:00
parent 38e75ec251
commit 9178badfd0
9 changed files with 408 additions and 202 deletions

5
Cargo.lock generated
View File

@ -3014,6 +3014,8 @@ version = "0.15.11"
dependencies = [
"anyhow",
"clap",
"letterbox-notmuch",
"letterbox-shared",
"serde",
"sqlx",
"tokio 1.44.2",
@ -3069,8 +3071,11 @@ version = "0.15.11"
dependencies = [
"build-info",
"letterbox-notmuch",
"regex",
"serde",
"sqlx",
"strum_macros 0.27.1",
"tracing",
]
[[package]]

View File

@ -13,6 +13,8 @@ version.workspace = true
[dependencies]
anyhow = "1.0.69"
clap = { version = "4.5.37", features = ["derive", "env"] }
letterbox-notmuch = { version = "0.15.11", path = "../notmuch" }
letterbox-shared = { version = "0.15.11", path = "../shared" }
serde = { version = "1.0.219", features = ["derive"] }
sqlx = { version = "0.8.5", features = ["postgres", "runtime-tokio"] }
tokio = { version = "1.44.2", features = ["rt", "macros", "rt-multi-thread"] }

View File

@ -1,165 +1,9 @@
use std::{collections::HashMap, convert::Infallible, io::Write, str::FromStr};
use std::{collections::HashMap, io::Write};
use clap::{Parser, Subcommand};
use serde::{Deserialize, Serialize};
use letterbox_shared::{cleanup_match, Match, MatchType, Rule};
use sqlx::{types::Json, PgPool};
#[derive(
Copy, Clone, Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize,
)]
enum MatchType {
From,
Sender,
To,
Cc,
Subject,
ListId,
DeliveredTo,
XForwardedTo,
ReplyTo,
XOriginalTo,
XSpam,
Body,
#[default]
Unknown,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Match {
match_type: MatchType,
needle: String,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Rule {
stop_on_match: bool,
matches: Vec<Match>,
tag: Option<String>,
}
fn unescape(s: &str) -> String {
s.replace('\\', "")
}
fn cleanup_match(prefix: &str, s: &str) -> String {
unescape(&s[prefix.len()..]).replace(".*", "")
}
mod matches {
// From https://linux.die.net/man/5/procmailrc
// If the regular expression contains '^TO_' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^-a-zA-Z0-9_.])?)'
// If the regular expression contains '^TO' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^a-zA-Z])?)', which should catch all destination specifications containing a specific word.
pub const TO: &'static str = "TO";
pub const CC: &'static str = "Cc";
pub const TOCC: &'static str = "(TO|Cc)";
pub const FROM: &'static str = "From";
pub const SENDER: &'static str = "Sender";
pub const SUBJECT: &'static str = "Subject";
pub const DELIVERED_TO: &'static str = "Delivered-To";
pub const X_FORWARDED_TO: &'static str = "X-Forwarded-To";
pub const REPLY_TO: &'static str = "Reply-To";
pub const X_ORIGINAL_TO: &'static str = "X-Original-To";
pub const LIST_ID: &'static str = "List-ID";
pub const X_SPAM: &'static str = "X-Spam";
pub const X_SPAM_FLAG: &'static str = "X-Spam-Flag";
}
impl FromStr for Match {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Examples:
// "* 1^0 ^TOsonyrewards.com@xinu.tv"
// "* ^TOsonyrewards.com@xinu.tv"
let mut it = s.split_whitespace().skip(1);
let mut needle = it.next().unwrap();
if needle == "1^0" {
needle = it.next().unwrap();
}
let mut needle = vec![needle];
needle.extend(it);
let needle = needle.join(" ");
let first = needle.chars().nth(0).unwrap_or(' ');
use matches::*;
if first == '^' {
let needle = &needle[1..];
if needle.starts_with(TO) {
return Ok(Match {
match_type: MatchType::To,
needle: cleanup_match(TO, needle),
});
} else if needle.starts_with(FROM) {
return Ok(Match {
match_type: MatchType::From,
needle: cleanup_match(FROM, needle),
});
} else if needle.starts_with(CC) {
return Ok(Match {
match_type: MatchType::Cc,
needle: cleanup_match(CC, needle),
});
} else if needle.starts_with(TOCC) {
return Ok(Match {
match_type: MatchType::To,
needle: cleanup_match(TOCC, needle),
});
} else if needle.starts_with(SENDER) {
return Ok(Match {
match_type: MatchType::Sender,
needle: cleanup_match(SENDER, needle),
});
} else if needle.starts_with(SUBJECT) {
return Ok(Match {
match_type: MatchType::Subject,
needle: cleanup_match(SUBJECT, needle),
});
} else if needle.starts_with(X_ORIGINAL_TO) {
return Ok(Match {
match_type: MatchType::XOriginalTo,
needle: cleanup_match(X_ORIGINAL_TO, needle),
});
} else if needle.starts_with(LIST_ID) {
return Ok(Match {
match_type: MatchType::ListId,
needle: cleanup_match(LIST_ID, needle),
});
} else if needle.starts_with(REPLY_TO) {
return Ok(Match {
match_type: MatchType::ReplyTo,
needle: cleanup_match(REPLY_TO, needle),
});
} else if needle.starts_with(X_SPAM_FLAG) {
return Ok(Match {
match_type: MatchType::XSpam,
needle: '*'.to_string(),
});
} else if needle.starts_with(X_SPAM) {
return Ok(Match {
match_type: MatchType::XSpam,
needle: '*'.to_string(),
});
} else if needle.starts_with(DELIVERED_TO) {
return Ok(Match {
match_type: MatchType::DeliveredTo,
needle: cleanup_match(DELIVERED_TO, needle),
});
} else if needle.starts_with(X_FORWARDED_TO) {
return Ok(Match {
match_type: MatchType::XForwardedTo,
needle: cleanup_match(X_FORWARDED_TO, needle),
});
} else {
unreachable!("needle: '{needle}'")
}
} else {
return Ok(Match {
match_type: MatchType::Body,
needle: cleanup_match("", &needle),
});
}
}
}
#[derive(Debug, Subcommand)]
enum Mode {
Debug,
@ -204,6 +48,9 @@ async fn main() -> anyhow::Result<()> {
match first {
':' => {
// start of rule
// If carbon-copy flag present, don't stop on match
cur_rule.stop_on_match = !l.contains('c');
}
'*' => {
// add to current rule
@ -212,13 +59,13 @@ async fn main() -> anyhow::Result<()> {
}
'.' => {
// delivery to folder
cur_rule.tag = Some(cleanup_match(
cur_rule.tag = cleanup_match(
"",
&l.replace('.', "/")
.replace(' ', "")
.trim_matches('/')
.to_string(),
));
);
rules.push(cur_rule);
cur_rule = Rule::default();
}
@ -226,7 +73,7 @@ async fn main() -> anyhow::Result<()> {
'|' => cur_rule = Rule::default(), // external command
'$' => {
// TODO(wathiede): tag messages with no other tag as 'inbox'
cur_rule.tag = Some(cleanup_match("", "inbox"));
cur_rule.tag = cleanup_match("", "inbox");
rules.push(cur_rule);
cur_rule = Rule::default();
} // variable, should only be $DEFAULT in my config
@ -262,42 +109,41 @@ fn notmuch_from_rules<W: Write>(mut w: W, rules: &[Rule]) -> anyhow::Result<()>
let mut lines = Vec::new();
for r in rules {
for m in &r.matches {
if let Some(t) = &r.tag {
if let MatchType::Unknown = m.match_type {
eprintln!("rule has unknown match {:?}", r);
continue;
}
let rule = match m.match_type {
MatchType::From => "from:",
// TODO(wathiede): something more specific?
MatchType::Sender => "from:",
MatchType::To => "to:",
MatchType::Cc => "to:",
MatchType::Subject => "subject:",
MatchType::ListId => "List-ID:",
MatchType::Body => "",
// TODO(wathiede): these will probably require adding fields to notmuch
// index. Handle them later.
MatchType::DeliveredTo
| MatchType::XForwardedTo
| MatchType::ReplyTo
| MatchType::XOriginalTo
| MatchType::XSpam => continue,
MatchType::Unknown => unreachable!(),
};
// Preserve unread status if run with --remove-all
lines.push(format!(
r#"-unprocessed +{} +unread -- is:unread tag:unprocessed {}"{}""#,
t, rule, m.needle
));
lines.push(format!(
// TODO(wathiede): this assumes `notmuch new` is configured to add
// `tag:unprocessed` to all new mail.
r#"-unprocessed +{} -- tag:unprocessed {}"{}""#,
t, rule, m.needle
));
let t = &r.tag;
if let MatchType::Unknown = m.match_type {
eprintln!("rule has unknown match {:?}", r);
continue;
}
let rule = match m.match_type {
MatchType::From => "from:",
// TODO(wathiede): something more specific?
MatchType::Sender => "from:",
MatchType::To => "to:",
MatchType::Cc => "to:",
MatchType::Subject => "subject:",
MatchType::ListId => "List-ID:",
MatchType::Body => "",
// TODO(wathiede): these will probably require adding fields to notmuch
// index. Handle them later.
MatchType::DeliveredTo
| MatchType::XForwardedTo
| MatchType::ReplyTo
| MatchType::XOriginalTo
| MatchType::XSpam => continue,
MatchType::Unknown => unreachable!(),
};
// Preserve unread status if run with --remove-all
lines.push(format!(
r#"-unprocessed +{} +unread -- is:unread tag:unprocessed {}"{}""#,
t, rule, m.needle
));
lines.push(format!(
// TODO(wathiede): this assumes `notmuch new` is configured to add
// `tag:unprocessed` to all new mail.
r#"-unprocessed +{} -- tag:unprocessed {}"{}""#,
t, rule, m.needle
));
}
}
lines.sort();

View File

@ -27,7 +27,7 @@ css-inline = "0.14.0"
futures = "0.3.31"
headers = "0.4.0"
html-escape = "0.2.13"
letterbox-notmuch = { version = "0.15.11", path = "../notmuch", registry = "xinu" }
letterbox-notmuch = { version = "0.15.11", path = "../notmuch" }
letterbox-shared = { version = "0.15.11", path = "../shared", registry = "xinu" }
linkify = "0.10.0"
log = "0.4.17"

View File

@ -0,0 +1,39 @@
use std::error::Error;
use clap::Parser;
use letterbox_notmuch::Notmuch;
use letterbox_server::nm::label_unprocessed;
use sqlx::postgres::PgPool;
use tracing::info;
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Cli {
#[arg(short, long, default_value = env!("DATABASE_URL"))]
newsreader_database_url: String,
#[arg(short, long, default_value = "10")]
/// Set to 0 to process all matches
messages_to_process: usize,
#[arg(short, long, default_value = "false")]
execute: bool,
/// Process messages matching this notmuch query
#[arg(short, long, default_value = "tag:unprocessed")]
query: String,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let cli = Cli::parse();
let _guard = xtracing::init(env!("CARGO_BIN_NAME"))?;
build_info::build_info!(fn bi);
info!("Build Info: {}", letterbox_shared::build_version(bi));
let pool = PgPool::connect(&cli.newsreader_database_url).await?;
let nm = Notmuch::default();
let limit = if cli.messages_to_process > 0 {
Some(cli.messages_to_process)
} else {
None
};
label_unprocessed(&nm, &pool, !cli.execute, limit, &cli.query).await?;
Ok(())
}

View File

@ -17,7 +17,7 @@ use tracing::instrument;
#[cfg(feature = "tantivy")]
use crate::tantivy::TantivyConnection;
use crate::{newsreader, nm, Query};
use crate::{newsreader, nm, nm::label_unprocessed, Query};
/// # Number of seconds since the Epoch
pub type UnixTime = isize;
@ -629,6 +629,10 @@ impl MutationRoot {
let pool = ctx.data_unchecked::<PgPool>();
info!("{}", String::from_utf8_lossy(&nm.new()?));
newsreader::refresh(pool, cacher).await?;
// Process email labels
label_unprocessed(&nm, &pool, false, Some(10), "tag:unprocessed").await?;
#[cfg(feature = "tantivy")]
{
let tantivy = ctx.data_unchecked::<TantivyConnection>();

View File

@ -1,11 +1,14 @@
use std::{collections::HashMap, fs::File};
use std::{
collections::{HashMap, HashSet},
fs::File,
};
use letterbox_notmuch::Notmuch;
use letterbox_shared::compute_color;
use letterbox_shared::{compute_color, Rule};
use log::{error, info, warn};
use mailparse::{parse_content_type, parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use memmap::MmapOptions;
use sqlx::PgPool;
use sqlx::{types::Json, PgPool};
use tracing::instrument;
use crate::{
@ -925,3 +928,106 @@ WHERE
.await?;
Ok(row.map(|r| r.url))
}
/*
* grab email_rules table from sql
* For each message with `unprocessed` label
* parse the message
* pass headers for each message through a matcher using email rules
* for each match, add label to message
* if any matches were found, remove unprocessed
* TODO: how to handle inbox label
*/
#[instrument(name="nm::label_unprocessed", skip_all, fields(dryrun=dryrun, limit=?limit, query=%query))]
pub async fn label_unprocessed(
nm: &Notmuch,
pool: &PgPool,
dryrun: bool,
limit: Option<usize>,
query: &str,
) -> Result<(), ServerError> {
use futures::StreamExt;
let ids = nm.message_ids(query)?;
info!(
"Processing {limit:?} of {} messages with '{query}'",
ids.len()
);
let rules: Vec<_> = sqlx::query!(
r#"
SELECT rule as "rule: Json<Rule>"
FROM email_rule
ORDER BY sort_order
"#,
)
.fetch(pool)
.map(|r| r.unwrap().rule.0)
.collect()
.await;
/*
use letterbox_shared::{Match, MatchType};
let rules = vec![Rule {
stop_on_match: false,
matches: vec![Match {
match_type: MatchType::From,
needle: "eftours".to_string(),
}],
tag: "EFTours".to_string(),
}];
*/
info!("Loaded {} rules", rules.len());
let ids = if let Some(limit) = limit {
&ids[..limit]
} else {
&ids[..]
};
for id in ids {
let id = format!("id:{id}");
let files = nm.files(&id)?;
// Only process the first file path is multiple files have the same id
let path = files.iter().next().unwrap();
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let (matched_rule, add_tags) = find_tags(&rules, &m.headers);
if matched_rule {
if dryrun {
info!(
"\nAdd tags: {add_tags:?}\nTo: {} From: {} Subject: {}\n",
m.headers.get_first_value("to").expect("no from header"),
m.headers.get_first_value("from").expect("no from header"),
m.headers
.get_first_value("subject")
.expect("no subject header")
);
} else {
for t in &add_tags {
nm.tag_add(t, &id)?;
}
if !add_tags.contains("inbox") {
nm.tag_remove("inbox", &id)?;
}
nm.tag_remove("unprocessed", &id)?;
}
}
}
Ok(())
}
fn find_tags<'a, 'b>(rules: &'a [Rule], headers: &'b [MailHeader]) -> (bool, HashSet<&'a str>) {
let mut matched_rule = false;
let mut add_tags = HashSet::new();
for rule in rules {
for hdr in headers {
if rule.is_match(&hdr.get_key(), &hdr.get_value()) {
//info!("Matched {rule:?}");
matched_rule = true;
add_tags.insert(rule.tag.as_str());
if rule.stop_on_match {
return (true, add_tags);
}
}
}
}
return (matched_rule, add_tags);
}

View File

@ -13,5 +13,8 @@ version.workspace = true
[dependencies]
build-info = "0.0.40"
letterbox-notmuch = { version = "0.15.11", path = "../notmuch", registry = "xinu" }
regex = "1.11.1"
serde = { version = "1.0.147", features = ["derive"] }
sqlx = "0.8.5"
strum_macros = "0.27.1"
tracing = "0.1.41"

View File

@ -1,8 +1,14 @@
use std::hash::{DefaultHasher, Hash, Hasher};
use std::{
convert::Infallible,
hash::{DefaultHasher, Hash, Hasher},
str::FromStr,
};
use build_info::{BuildInfo, VersionControl};
use letterbox_notmuch::SearchSummary;
use regex::{RegexBuilder, RegexSetBuilder};
use serde::{Deserialize, Serialize};
use tracing::debug;
#[derive(Serialize, Deserialize, Debug)]
pub struct SearchResult {
@ -65,3 +71,198 @@ pub fn compute_color(data: &str) -> String {
data.hash(&mut hasher);
format!("#{:06x}", hasher.finish() % (1 << 24))
}
#[derive(
Copy, Clone, Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize,
)]
pub enum MatchType {
From,
Sender,
To,
Cc,
Subject,
ListId,
DeliveredTo,
XForwardedTo,
ReplyTo,
XOriginalTo,
XSpam,
Body,
#[default]
Unknown,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Match {
pub match_type: MatchType,
pub needle: String,
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Rule {
pub stop_on_match: bool,
pub matches: Vec<Match>,
pub tag: String,
}
impl Rule {
pub fn is_match(&self, header_key: &str, header_value: &str) -> bool {
let pats: Vec<_> = self
.matches
.iter()
.filter_map(|m| match m.match_type {
MatchType::To => Some("^(to|cc|bcc|x-original-to)$"),
MatchType::From => Some("^from$"),
MatchType::Sender => Some("^sender$"),
MatchType::Subject => Some("^subject$"),
MatchType::ListId => Some("^list-id$"),
MatchType::XOriginalTo => Some("^x-original-to$"),
MatchType::ReplyTo => Some("^reply-to$"),
MatchType::XSpam => Some("^x-spam$"),
MatchType::Body => None,
c => panic!("TODO handle '{c:?}' match type"),
})
.collect();
let set = RegexSetBuilder::new(&pats)
.case_insensitive(true)
.build()
.expect("failed to compile regex for matches");
let matches: Vec<_> = set.matches(header_key).into_iter().collect();
if !matches.is_empty() {
//info!("matched key '{header_key}' '{header_value}'");
for m_idx in matches {
let needle = regex::escape(&self.matches[m_idx].needle);
let pat = RegexBuilder::new(&needle)
.case_insensitive(true)
.build()
.expect("failed to compile regex for needle");
if pat.is_match(header_value) {
debug!("{header_key} matched {header_value} against {needle}");
return true;
}
}
}
false
}
}
mod matches {
// From https://linux.die.net/man/5/procmailrc
// If the regular expression contains '^TO_' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^-a-zA-Z0-9_.])?)'
// If the regular expression contains '^TO' it will be substituted by '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To):(.*[^a-zA-Z])?)', which should catch all destination specifications containing a specific word.
pub const TO: &'static str = "TO";
pub const CC: &'static str = "Cc";
pub const TOCC: &'static str = "(TO|Cc)";
pub const FROM: &'static str = "From";
pub const SENDER: &'static str = "Sender";
pub const SUBJECT: &'static str = "Subject";
pub const DELIVERED_TO: &'static str = "Delivered-To";
pub const X_FORWARDED_TO: &'static str = "X-Forwarded-To";
pub const REPLY_TO: &'static str = "Reply-To";
pub const X_ORIGINAL_TO: &'static str = "X-Original-To";
pub const LIST_ID: &'static str = "List-ID";
pub const X_SPAM: &'static str = "X-Spam";
pub const X_SPAM_FLAG: &'static str = "X-Spam-Flag";
}
impl FromStr for Match {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Examples:
// "* 1^0 ^TOsonyrewards.com@xinu.tv"
// "* ^TOsonyrewards.com@xinu.tv"
let mut it = s.split_whitespace().skip(1);
let mut needle = it.next().unwrap();
if needle == "1^0" {
needle = it.next().unwrap();
}
let mut needle = vec![needle];
needle.extend(it);
let needle = needle.join(" ");
let first = needle.chars().nth(0).unwrap_or(' ');
use matches::*;
if first == '^' {
let needle = &needle[1..];
if needle.starts_with(TO) {
return Ok(Match {
match_type: MatchType::To,
needle: cleanup_match(TO, needle),
});
} else if needle.starts_with(FROM) {
return Ok(Match {
match_type: MatchType::From,
needle: cleanup_match(FROM, needle),
});
} else if needle.starts_with(CC) {
return Ok(Match {
match_type: MatchType::Cc,
needle: cleanup_match(CC, needle),
});
} else if needle.starts_with(TOCC) {
return Ok(Match {
match_type: MatchType::To,
needle: cleanup_match(TOCC, needle),
});
} else if needle.starts_with(SENDER) {
return Ok(Match {
match_type: MatchType::Sender,
needle: cleanup_match(SENDER, needle),
});
} else if needle.starts_with(SUBJECT) {
return Ok(Match {
match_type: MatchType::Subject,
needle: cleanup_match(SUBJECT, needle),
});
} else if needle.starts_with(X_ORIGINAL_TO) {
return Ok(Match {
match_type: MatchType::XOriginalTo,
needle: cleanup_match(X_ORIGINAL_TO, needle),
});
} else if needle.starts_with(LIST_ID) {
return Ok(Match {
match_type: MatchType::ListId,
needle: cleanup_match(LIST_ID, needle),
});
} else if needle.starts_with(REPLY_TO) {
return Ok(Match {
match_type: MatchType::ReplyTo,
needle: cleanup_match(REPLY_TO, needle),
});
} else if needle.starts_with(X_SPAM_FLAG) {
return Ok(Match {
match_type: MatchType::XSpam,
needle: '*'.to_string(),
});
} else if needle.starts_with(X_SPAM) {
return Ok(Match {
match_type: MatchType::XSpam,
needle: '*'.to_string(),
});
} else if needle.starts_with(DELIVERED_TO) {
return Ok(Match {
match_type: MatchType::DeliveredTo,
needle: cleanup_match(DELIVERED_TO, needle),
});
} else if needle.starts_with(X_FORWARDED_TO) {
return Ok(Match {
match_type: MatchType::XForwardedTo,
needle: cleanup_match(X_FORWARDED_TO, needle),
});
} else {
unreachable!("needle: '{needle}'")
}
} else {
return Ok(Match {
match_type: MatchType::Body,
needle: cleanup_match("", &needle),
});
}
}
}
fn unescape(s: &str) -> String {
s.replace('\\', "")
}
pub fn cleanup_match(prefix: &str, s: &str) -> String {
unescape(&s[prefix.len()..]).replace(".*", "")
}