From 4526d99de952a8a14cf4db22f5e168970d6c38c3 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Sun, 31 Aug 2025 13:19:40 -0700 Subject: [PATCH] server: add another test and tweak fallback extraction logic --- server/src/email_extract.rs | 141 +++++++++++++++++++++++------------- 1 file changed, 90 insertions(+), 51 deletions(-) diff --git a/server/src/email_extract.rs b/server/src/email_extract.rs index 1ab820b..e96e271 100644 --- a/server/src/email_extract.rs +++ b/server/src/email_extract.rs @@ -90,63 +90,76 @@ pub fn extract_calendar_metadata_from_mail( } } // Try to extract start/end dates from subject - if start_date.is_none() || end_date.is_none() { - if let Some(subject) = m.headers.get_first_value("Subject") { - // Pattern: from Thu Sep 11 to Fri Jan 30, 2026 - if let Some(caps) = regex::Regex::new(r"from [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) to [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { - let start_month = &caps[1]; - let start_day = &caps[2]; - let end_month = &caps[3]; - let end_day = &caps[4]; - let year = &caps[5]; - fn month_num(mon: &str) -> Option<&'static str> { - match mon { - "Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), - "May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"), - "Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"), - _ => None + if start_date.is_none() || end_date.is_none() { + if let Some(subject) = m.headers.get_first_value("Subject") { + // Pattern: New event: Dentist appt @ Tue Sep 23, 2025 3pm - 4pm (PDT) (tconvertino@gmail.com) + if let Some(caps) = regex::Regex::new(r"New event: [^@]+@ ([A-Za-z]{3}) ([A-Za-z]{3}) (\d{1,2}), (\d{4}) (\d{1,2})(?::(\d{2}))? ?([ap]m) ?- ?(\d{1,2})(?::(\d{2}))? ?([ap]m)").ok().and_then(|re| re.captures(&subject)) { + let month = &caps[2]; + let day = &caps[3]; + let year = &caps[4]; + let date_str = format!("{} {} {}", month, day, year); + if let Ok(date) = chrono::NaiveDate::parse_from_str(&date_str, "%b %d %Y") { + let ymd = date.format("%Y%m%d").to_string(); + start_date = Some(ymd.clone()); + end_date = Some(ymd); } - } - if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) { - let current_year = chrono::Local::now().year().to_string(); - let start = format!("{}{}{}", current_year, sm, format!("{:0>2}", start_day)); - let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok(); - if let Some(d) = end_date_val.as_mut() { - *d = d.succ_opt().unwrap_or(*d); + } else { + // Pattern: from Thu Sep 11 to Fri Jan 30, 2026 + if let Some(caps) = regex::Regex::new(r"from [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) to [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { + let start_month = &caps[1]; + let start_day = &caps[2]; + let end_month = &caps[3]; + let end_day = &caps[4]; + let year = &caps[5]; + fn month_num(mon: &str) -> Option<&'static str> { + match mon { + "Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), + "May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"), + "Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"), + _ => None + } + } + if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) { + let current_year = chrono::Local::now().year().to_string(); + let start = format!("{}{}{}", current_year, sm, format!("{:0>2}", start_day)); + let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok(); + if let Some(d) = end_date_val.as_mut() { + *d = d.succ_opt().unwrap_or(*d); + } + let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day))); + if start_date.is_none() { start_date = Some(start); } + if end_date.is_none() { end_date = Some(end); } + } } - let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day))); - if start_date.is_none() { start_date = Some(start); } - if end_date.is_none() { end_date = Some(end); } - } - } - // Pattern: @ Tue Jun 24 - Mon Jun 30, 2025 - if let Some(caps) = regex::Regex::new(r"@ [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) - [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { - let start_month = &caps[1]; - let start_day = &caps[2]; - let end_month = &caps[3]; - let end_day = &caps[4]; - let year = &caps[5]; - fn month_num(mon: &str) -> Option<&'static str> { - match mon { - "Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), - "May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"), - "Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"), - _ => None + // Pattern: @ Tue Jun 24 - Mon Jun 30, 2025 + if let Some(caps) = regex::Regex::new(r"@ [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}) - [A-Za-z]{3} ([A-Za-z]{3}) (\d{1,2}), (\d{4})").ok().and_then(|re| re.captures(&subject)) { + let start_month = &caps[1]; + let start_day = &caps[2]; + let end_month = &caps[3]; + let end_day = &caps[4]; + let year = &caps[5]; + fn month_num(mon: &str) -> Option<&'static str> { + match mon { + "Jan" => Some("01"), "Feb" => Some("02"), "Mar" => Some("03"), "Apr" => Some("04"), + "May" => Some("05"), "Jun" => Some("06"), "Jul" => Some("07"), "Aug" => Some("08"), + "Sep" => Some("09"), "Oct" => Some("10"), "Nov" => Some("11"), "Dec" => Some("12"), + _ => None + } + } + if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) { + let start = format!("{}{}{}", year, sm, format!("{:0>2}", start_day)); + let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok(); + if let Some(d) = end_date_val.as_mut() { + *d = d.succ_opt().unwrap_or(*d); + } + let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day))); + if start_date.is_none() { start_date = Some(start); } + if end_date.is_none() { end_date = Some(end); } + } } } - if let (Some(sm), Some(em)) = (month_num(start_month), month_num(end_month)) { - let start = format!("{}{}{}", year, sm, format!("{:0>2}", start_day)); - let mut end_date_val = chrono::NaiveDate::parse_from_str(&format!("{}-{}-{}", year, em, format!("{:0>2}", end_day)), "%Y-%m-%d").ok(); - if let Some(d) = end_date_val.as_mut() { - *d = d.succ_opt().unwrap_or(*d); - } - let end = end_date_val.map(|d| d.format("%Y%m%d").to_string()).unwrap_or_else(|| format!("{}{}{}", year, em, format!("{:0>2}", end_day))); - if start_date.is_none() { start_date = Some(start); } - if end_date.is_none() { end_date = Some(end); } - } } } - } // Try to extract summary from body if still missing if summary.is_none() { if let Body::PlainText(t) = body { @@ -2093,6 +2106,32 @@ fn parse_ical_datetime_tz(dt: &str, tz: Tz) -> Option> { #[cfg(test)] mod tests { + #[test] + fn google_calendar_email_3_single_event_metadata() { + use mailparse::parse_mail; + let raw_email = include_str!("../../server/testdata/google-calendar-example-3.eml"); + let parsed = parse_mail(raw_email.as_bytes()).expect("parse_mail"); + let mut part_addr = vec![]; + let body = extract_body(&parsed, &mut part_addr).expect("extract_body"); + let meta = extract_calendar_metadata_from_mail(&parsed, &body); + // Assert detection as Google Calendar + assert!(meta.is_google_calendar_event); + // Assert metadata extraction + assert_eq!(meta.summary, Some("Dentist appt".to_string())); + // Organizer: from From header, extract email address + assert_eq!(meta.organizer, Some("tconvertino@gmail.com".to_string())); + // Dates: should extract Sep 23, 2025, 3pm-4pm + assert_eq!(meta.start_date, Some("20250923".to_string())); + assert_eq!(meta.end_date, Some("20250923".to_string())); + // Should not be recurring + if let Some(ref html) = meta.body_html { + assert!(html.contains("Dentist appt"), "HTML should contain the summary"); + assert!(html.contains("20250923"), "HTML should contain the event date"); + assert!(!html.contains("Repeats"), "HTML should not mention recurrence"); + } else { + panic!("No body_html rendered"); + } + } #[test] fn google_calendar_email_2_metadata_no_recurrence() { use mailparse::parse_mail;