server: add another test and tweak fallback extraction logic

This commit is contained in:
Bill Thiede 2025-08-31 13:19:40 -07:00
parent 6ff9b2cd54
commit 4526d99de9

View File

@ -90,63 +90,76 @@ pub fn extract_calendar_metadata_from_mail(
} }
} }
// Try to extract start/end dates from subject // Try to extract start/end dates from subject
if start_date.is_none() || end_date.is_none() { if start_date.is_none() || end_date.is_none() {
if let Some(subject) = m.headers.get_first_value("Subject") { if let Some(subject) = m.headers.get_first_value("Subject") {
// Pattern: from Thu Sep 11 to Fri Jan 30, 2026 // Pattern: New event: Dentist appt @ Tue Sep 23, 2025 3pm - 4pm (PDT) (tconvertino@gmail.com)
if let Some(caps) = regex::Regex::new(r"from [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) to [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { if let Some(caps) = regex::Regex::new(r"New event: [^@]+@ ([A-Za-z]{3}) ([A-Za-z]{3}) (\d{1,2}), (\d{4}) (\d{1,2})(?::(\d{2}))? ?([ap]m) ?- ?(\d{1,2})(?::(\d{2}))? ?([ap]m)").ok().and_then(|re| re.captures(&subject)) {
let start_month = &caps[1]; let month = &caps[2];
let start_day = &caps[2]; let day = &caps[3];
let end_month = &caps[3]; let year = &caps[4];
let end_day = &caps[4]; let date_str = format!("{} {} {}", month, day, year);
let year = &caps[5]; if let Ok(date) = chrono::NaiveDate::parse_from_str(&date_str, "%b %d %Y") {
fn month_num(mon: &str) -> Option<&'static str> { let ymd = date.format("%Y%m%d").to_string();
match mon { start_date = Some(ymd.clone());
"Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), end_date = Some(ymd);
"May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"),
"Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"),
_ => None
} }
} } else {
if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) { // Pattern: from Thu Sep 11 to Fri Jan 30, 2026
let current_year = chrono::Local::now().year().to_string(); if let Some(caps) = regex::Regex::new(r"from [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) to [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) {
let start = format!("{}{}{}", current_year, sm, format!("{:0>2}", start_day)); let start_month = &caps[1];
let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok(); let start_day = &caps[2];
if let Some(d) = end_date_val.as_mut() { let end_month = &caps[3];
*d = d.succ_opt().unwrap_or(*d); let end_day = &caps[4];
let year = &caps[5];
fn month_num(mon: &str) -> Option<&'static str> {
match mon {
"Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"),
"May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"),
"Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"),
_ => None
}
}
if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) {
let current_year = chrono::Local::now().year().to_string();
let start = format!("{}{}{}", current_year, sm, format!("{:0>2}", start_day));
let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok();
if let Some(d) = end_date_val.as_mut() {
*d = d.succ_opt().unwrap_or(*d);
}
let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day)));
if start_date.is_none() { start_date = Some(start); }
if end_date.is_none() { end_date = Some(end); }
}
} }
let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day))); // Pattern: @ Tue Jun 24 - Mon Jun 30, 2025
if start_date.is_none() { start_date = Some(start); } if let Some(caps) = regex::Regex::new(r"@ [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) - [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) {
if end_date.is_none() { end_date = Some(end); } let start_month = &caps[1];
} let start_day = &caps[2];
} let end_month = &caps[3];
// Pattern: @ Tue Jun 24 - Mon Jun 30, 2025 let end_day = &caps[4];
if let Some(caps) = regex::Regex::new(r"@ [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) - [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { let year = &caps[5];
let start_month = &caps[1]; fn month_num(mon: &str) -> Option<&'static str> {
let start_day = &caps[2]; match mon {
let end_month = &caps[3]; "Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"),
let end_day = &caps[4]; "May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"),
let year = &caps[5]; "Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"),
fn month_num(mon: &str) -> Option<&'static str> { _ => None
match mon { }
"Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), }
"May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"), if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) {
"Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"), let start = format!("{}{}{}", year, sm, format!("{:0>2}", start_day));
_ => None let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok();
if let Some(d) = end_date_val.as_mut() {
*d = d.succ_opt().unwrap_or(*d);
}
let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day)));
if start_date.is_none() { start_date = Some(start); }
if end_date.is_none() { end_date = Some(end); }
}
} }
} }
if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) {
let start = format!("{}{}{}", year, sm, format!("{:0>2}", start_day));
let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok();
if let Some(d) = end_date_val.as_mut() {
*d = d.succ_opt().unwrap_or(*d);
}
let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day)));
if start_date.is_none() { start_date = Some(start); }
if end_date.is_none() { end_date = Some(end); }
}
} }
} }
}
// Try to extract summary from body if still missing // Try to extract summary from body if still missing
if summary.is_none() { if summary.is_none() {
if let Body::PlainText(t) = body { if let Body::PlainText(t) = body {
@ -2093,6 +2106,32 @@ fn parse_ical_datetime_tz(dt: &str, tz: Tz) -> Option<chrono::DateTime<Tz>> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#[test]
fn google_calendar_email_3_single_event_metadata() {
use mailparse::parse_mail;
let raw_email = include_str!("../../server/testdata/google-calendar-example-3.eml");
let parsed = parse_mail(raw_email.as_bytes()).expect("parse_mail");
let mut part_addr = vec![];
let body = extract_body(&parsed, &mut part_addr).expect("extract_body");
let meta = extract_calendar_metadata_from_mail(&parsed, &body);
// Assert detection as Google Calendar
assert!(meta.is_google_calendar_event);
// Assert metadata extraction
assert_eq!(meta.summary, Some("Dentist appt".to_string()));
// Organizer: from From header, extract email address
assert_eq!(meta.organizer, Some("tconvertino@gmail.com".to_string()));
// Dates: should extract Sep 23, 2025, 3pm-4pm
assert_eq!(meta.start_date, Some("20250923".to_string()));
assert_eq!(meta.end_date, Some("20250923".to_string()));
// Should not be recurring
if let Some(ref html) = meta.body_html {
assert!(html.contains("Dentist appt"), "HTML should contain the summary");
assert!(html.contains("20250923"), "HTML should contain the event date");
assert!(!html.contains("Repeats"), "HTML should not mention recurrence");
} else {
panic!("No body_html rendered");
}
}
#[test] #[test]
fn google_calendar_email_2_metadata_no_recurrence() { fn google_calendar_email_2_metadata_no_recurrence() {
use mailparse::parse_mail; use mailparse::parse_mail;