From 06e65a52b3f1f6e4ed0a3bf6cbbaf9896bd10870 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Tue, 2 Sep 2025 19:24:34 -0700 Subject: [PATCH] server: most test to end of file --- server/src/email_extract.rs | 158 ++++++++++++++++++++++++++++-------- 1 file changed, 123 insertions(+), 35 deletions(-) diff --git a/server/src/email_extract.rs b/server/src/email_extract.rs index 393f919..5a8a5c9 100644 --- a/server/src/email_extract.rs +++ b/server/src/email_extract.rs @@ -1,3 +1,13 @@ +#[derive(Debug, PartialEq)] +pub struct ExtractedCalendarMetadata { + pub is_google_calendar_event: bool, + pub summary: Option, + pub organizer: Option, + pub start_date: Option, + pub end_date: Option, + pub body_html: Option, +} + /// Helper to extract Google Calendar event metadata from a ParsedMail (for tests and features) pub fn extract_calendar_metadata_from_mail( m: &ParsedMail, @@ -52,7 +62,7 @@ pub fn extract_calendar_metadata_from_mail( summary = prop.value.clone(); break 'event_loop; } - }, + } "ORGANIZER" => organizer = prop.value.clone(), "DTSTART" => { if let Some(dt) = &prop.value { @@ -82,18 +92,24 @@ pub fn extract_calendar_metadata_from_mail( // Try to extract summary from subject (e.g., "New event: @ ...") if summary.is_none() { if let Some(subject) = m.headers.get_first_value("Subject") { - if let Some(caps) = regex::Regex::new(r"New event: ([^@]+) @").ok().and_then(|re| re.captures(&subject)) { + if let Some(caps) = regex::Regex::new(r"New event: ([^@]+) @") + .ok() + .and_then(|re| re.captures(&subject)) + { summary = Some(caps[1].trim().to_string()); - } else if let Some(caps) = regex::Regex::new(r"Invitation: ([^@]+) @").ok().and_then(|re| re.captures(&subject)) { + } else if let Some(caps) = regex::Regex::new(r"Invitation: ([^@]+) @") + .ok() + .and_then(|re| re.captures(&subject)) + { summary = Some(caps[1].trim().to_string()); } } } // Try to extract start/end dates from subject - if start_date.is_none() || end_date.is_none() { - if let Some(subject) = m.headers.get_first_value("Subject") { - // Pattern: New event: Dentist appt @ Tue Sep 23, 2025 3pm - 4pm (PDT) (tconvertino@gmail.com) - if let Some(caps) = regex::Regex::new(r"New event: [^@]+@ ([A-Za-z]{3}) ([A-Za-z]{3}) (\d{1,2}), (\d{4}) (\d{1,2})(?::(\d{2}))? ?([ap]m) ?- ?(\d{1,2})(?::(\d{2}))? ?([ap]m)").ok().and_then(|re| re.captures(&subject)) { + if start_date.is_none() || end_date.is_none() { + if let Some(subject) = m.headers.get_first_value("Subject") { + // Pattern: New event: Dentist appt @ Tue Sep 23, 2025 3pm - 4pm (PDT) (tconvertino@gmail.com) + if let Some(caps) = regex::Regex::new(r"New event: [^@]+@ ([A-Za-z]{3}) ([A-Za-z]{3}) (\d{1,2}), (\d{4}) (\d{1,2})(?::(\d{2}))? ?([ap]m) ?- ?(\d{1,2})(?::(\d{2}))? ?([ap]m)").ok().and_then(|re| re.captures(&subject)) { let month = &caps[2]; let day = &caps[3]; let year = &caps[4]; @@ -158,8 +174,8 @@ pub fn extract_calendar_metadata_from_mail( } } } - } } + } // Try to extract summary from body if still missing if summary.is_none() { if let Body::PlainText(t) = body { @@ -173,7 +189,9 @@ pub fn extract_calendar_metadata_from_mail( } if summary.is_none() { if let Body::Html(h) = body { - let text = regex::Regex::new(r"<[^>]+>").unwrap().replace_all(&h.html, ""); + let text = regex::Regex::new(r"<[^>]+>") + .unwrap() + .replace_all(&h.html, ""); for line in text.lines() { let line = line.trim(); if !line.is_empty() && line.len() > 3 && line.len() < 100 { @@ -205,7 +223,17 @@ pub fn extract_calendar_metadata_from_mail( // Improved recurrence detection: check for common recurrence phrases in subject, HTML, and plain text body let mut has_recurrence = false; let recurrence_phrases = [ - "recurr", "repeat", "every week", "every month", "every year", "weekly", "monthly", "annually", "biweekly", "daily", "RRULE" + "recurr", + "repeat", + "every week", + "every month", + "every year", + "weekly", + "monthly", + "annually", + "biweekly", + "daily", + "RRULE", ]; if let Some(ref s) = m.headers.get_first_value("Subject") { let subj = s.to_lowercase(); @@ -229,13 +257,18 @@ pub fn extract_calendar_metadata_from_mail( } } } - let needs_ical_flex = summary.is_some() || start_date.is_some() || end_date.is_some() || has_recurrence; + let needs_ical_flex = + summary.is_some() || start_date.is_some() || end_date.is_some() || has_recurrence; if needs_ical_flex { let summary_val = summary.clone().unwrap_or_default(); let organizer_val = organizer.clone().unwrap_or_default(); let start_val = start_date.clone().unwrap_or_default(); let end_val = end_date.clone().unwrap_or_default(); - let recurrence_display = if has_recurrence { "Repeats".to_string() } else { String::new() }; + let recurrence_display = if has_recurrence { + "Repeats".to_string() + } else { + String::new() + }; let template = IcalSummaryTemplate { summary: &summary_val, local_fmt_start: &start_val, @@ -249,13 +282,15 @@ pub fn extract_calendar_metadata_from_mail( today: Some(chrono::Local::now().date_naive()), recurrence_display, }; - let fallback_html = template.render().unwrap_or_else(|_| String::from("
")); + let fallback_html = template + .render() + .unwrap_or_else(|_| String::from("
")); match &mut body_html { Some(existing) => { if !existing.starts_with(&fallback_html) { *existing = format!("{}{}", fallback_html, existing); } - }, + } None => { body_html = Some(fallback_html); } @@ -276,16 +311,26 @@ pub fn extract_calendar_metadata_from_mail( today: Some(chrono::Local::now().date_naive()), recurrence_display: String::new(), }; - body_html = Some(template.render().unwrap_or_else(|_| String::from("
"))); + body_html = Some( + template + .render() + .unwrap_or_else(|_| String::from("
")), + ); } // Improved fallback: extract summary, start_date, end_date, and recurrence from subject/body if not found if let Some(subject) = m.headers.get_first_value("Subject") { // Try to extract summary from subject (e.g., "New event: @ ...") if summary.is_none() { - if let Some(caps) = regex::Regex::new(r"New event: ([^@]+) @").ok().and_then(|re| re.captures(&subject)) { + if let Some(caps) = regex::Regex::new(r"New event: ([^@]+) @") + .ok() + .and_then(|re| re.captures(&subject)) + { summary = Some(caps[1].trim().to_string()); - } else if let Some(caps) = regex::Regex::new(r"Invitation: ([^@]+) @").ok().and_then(|re| re.captures(&subject)) { + } else if let Some(caps) = regex::Regex::new(r"Invitation: ([^@]+) @") + .ok() + .and_then(|re| re.captures(&subject)) + { summary = Some(caps[1].trim().to_string()); } } @@ -346,7 +391,7 @@ pub fn extract_calendar_metadata_from_mail( } } // Try to detect recurrence from subject - // recurrence detection and rendering is now handled by the template logic + // recurrence detection and rendering is now handled by the template logic } // Try to extract summary from body if still missing if summary.is_none() { @@ -361,7 +406,9 @@ pub fn extract_calendar_metadata_from_mail( } if summary.is_none() { if let Body::Html(h) = body { - let text = regex::Regex::new(r"<[^>]+>").unwrap().replace_all(&h.html, ""); + let text = regex::Regex::new(r"<[^>]+>") + .unwrap() + .replace_all(&h.html, ""); for line in text.lines() { let line = line.trim(); if !line.is_empty() && line.len() > 3 && line.len() < 100 { @@ -2106,6 +2153,7 @@ fn parse_ical_datetime_tz(dt: &str, tz: Tz) -> Option> { #[cfg(test)] mod tests { + use super::*; #[test] fn google_calendar_email_3_single_event_metadata() { use mailparse::parse_mail; @@ -2125,9 +2173,18 @@ mod tests { assert_eq!(meta.end_date, Some("20250923".to_string())); // Should not be recurring if let Some(ref html) = meta.body_html { - assert!(html.contains("Dentist appt"), "HTML should contain the summary"); - assert!(html.contains("20250923"), "HTML should contain the event date"); - assert!(!html.contains("Repeats"), "HTML should not mention recurrence"); + assert!( + html.contains("Dentist appt"), + "HTML should contain the summary" + ); + assert!( + html.contains("20250923"), + "HTML should contain the event date" + ); + assert!( + !html.contains("Repeats"), + "HTML should not mention recurrence" + ); } else { panic!("No body_html rendered"); } @@ -2145,7 +2202,10 @@ mod tests { // Assert metadata extraction (update these values to match the new .eml) assert_eq!(meta.summary, Some("McClure BLT".to_string())); // Organizer: from From header, extract email address - assert_eq!(meta.organizer, Some("calendar-notification@google.com".to_string())); + assert_eq!( + meta.organizer, + Some("calendar-notification@google.com".to_string()) + ); // Dates: from subject, Thu Sep 11 to Fri Jan 30, 2026 let current_year = chrono::Local::now().year(); assert_eq!(meta.start_date, Some(format!("{}0911", current_year))); @@ -2153,7 +2213,7 @@ mod tests { } #[test] fn google_calendar_email_2_renders_calendar_and_recurrence() { - // ...existing code... + // ...existing code... use mailparse::parse_mail; let raw_email = include_str!("../../server/testdata/google-calendar-example-2.eml"); let parsed = parse_mail(raw_email.as_bytes()).expect("parse_mail"); @@ -2164,25 +2224,47 @@ mod tests { let html = meta.body_html.expect("body_html"); println!("Rendered HTML for verification:\n{}", html); // Check that the HTML contains the summary, organizer, start, and end times with labels - assert!(html.contains("Summary: McClure BLT"), "HTML should contain the labeled summary/title"); - assert!(html.contains("Organizer: calendar-notification@google.com"), "HTML should contain the labeled organizer"); - assert!(html.contains("Start: 20250911"), "HTML should contain the labeled start time"); - assert!(html.contains("End: 20260131"), "HTML should contain the labeled end time"); + assert!( + html.contains("Summary: McClure BLT"), + "HTML should contain the labeled summary/title" + ); + assert!( + html.contains("Organizer: calendar-notification@google.com"), + "HTML should contain the labeled organizer" + ); + assert!( + html.contains("Start: 20250911"), + "HTML should contain the labeled start time" + ); + assert!( + html.contains("End: 20260131"), + "HTML should contain the labeled end time" + ); if !html.contains("ical-flex") { println!("FAIL: html did not contain 'ical-flex':\n{}", html); } - assert!(html.contains("ical-flex"), "Calendar widget should be rendered"); + assert!( + html.contains("ical-flex"), + "Calendar widget should be rendered" + ); // Recurrence info should be present - if !(html.contains("Repeats: Repeats") || html.contains("recurr") || html.contains("RRULE")) { + if !(html.contains("Repeats: Repeats") + || html.contains("recurr") + || html.contains("RRULE")) + { println!("FAIL: html did not contain recurrence info:\n{}", html); } - assert!(html.contains("Repeats: Repeats") || html.contains("recurr") || html.contains("RRULE"), "Recurrence info should be present in HTML"); + assert!( + html.contains("Repeats: Repeats") + || html.contains("recurr") + || html.contains("RRULE"), + "Recurrence info should be present in HTML" + ); } - use super::*; #[test] fn google_calendar_email_renders_ical_summary() { use mailparse::parse_mail; - let raw_email = include_str!("../../server/testdata/google-calendar-example.eml"); + let raw_email = include_str!("../testdata/google-calendar-example.eml"); let parsed = parse_mail(raw_email.as_bytes()).expect("parse_mail"); let mut part_addr = vec![]; let body = extract_body(&parsed, &mut part_addr).expect("extract_body"); @@ -2255,7 +2337,10 @@ mod tests { assert!(meta.is_google_calendar_event); // Assert that the summary and organizer are present assert_eq!(meta.summary, Some("McClure BLT".to_string())); - assert_eq!(meta.organizer, Some("calendar-notification@google.com".to_string())); + assert_eq!( + meta.organizer, + Some("calendar-notification@google.com".to_string()) + ); // Assert that the start and end dates are present let current_year = chrono::Local::now().year(); assert_eq!(meta.start_date, Some(format!("{}0911", current_year))); @@ -2265,7 +2350,10 @@ mod tests { if !(html.contains("Repeats") || html.contains("recurr") || html.contains("RRULE")) { println!("FAIL: html did not contain recurrence info:\n{}", html); } - assert!(html.contains("Repeats") || html.contains("recurr") || html.contains("RRULE"), "Recurrence info should be present in HTML"); + assert!( + html.contains("Repeats") || html.contains("recurr") || html.contains("RRULE"), + "Recurrence info should be present in HTML" + ); } else { panic!("No body_html rendered"); }