From 5c9955a89e2f96e8f570ff6195ea903706856b6b Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Tue, 12 Aug 2025 16:56:16 -0700 Subject: [PATCH] server: fix raw dmarc extraction for non-Google domains --- server/src/nm.rs | 84 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/server/src/nm.rs b/server/src/nm.rs index a44389c..52f9126 100644 --- a/server/src/nm.rs +++ b/server/src/nm.rs @@ -180,6 +180,7 @@ pub async fn thread( // display names (that default to the most commonly seen name). let mut messages = Vec::new(); for (path, id) in std::iter::zip(nm.files(&thread_id)?, nm.message_ids(&thread_id)?) { + let mut html_report_summary: Option = None; let tags = nm.tags_for_query(&format!("id:{}", id))?; let file = File::open(&path)?; let mmap = unsafe { MmapOptions::new().map(&file)? }; @@ -346,7 +347,8 @@ pub async fn thread( // Append DMARC report if available if m.ctype.mimetype.as_str() == APPLICATION_ZIP { - if let Ok(Body::Html(_html_body)) = extract_zip(&m) { + if let Ok(Body::Html(html_body)) = extract_zip(&m) { + html_report_summary = Some(html_body.html); // Extract raw XML for pretty printing if let Ok(zip_bytes) = m.get_body_raw() { if let Ok(mut archive) = ZipArchive::new(Cursor::new(&zip_bytes)) { @@ -368,8 +370,10 @@ pub async fn thread( } if m.ctype.mimetype.as_str() == APPLICATION_GZIP { - if let Ok(Body::Html(_html_body)) = extract_gzip(&m) { - // Extract raw XML for pretty printing + // Call extract_gzip to get the HTML summary and also to determine if it's a DMARC report + if let Ok((Body::Html(html_body), _)) = extract_gzip(&m) { + html_report_summary = Some(html_body.html); + // If extract_gzip successfully parsed a DMARC report, then extract the raw content if let Ok(gz_bytes) = m.get_body_raw() { let mut decoder = flate2::read::GzDecoder::new(&gz_bytes[..]); let mut xml = String::new(); @@ -381,6 +385,17 @@ pub async fn thread( } } + let mut current_html = final_body.to_html().unwrap_or_default(); + + if let Some(html_summary) = html_report_summary { + current_html.push_str(&html_summary); + } + + error!( + "mimetype {} raw_report_content.is_some() {}", + m.ctype.mimetype.as_str(), + raw_report_content.is_some() + ); if let Some(raw_content) = raw_report_content { let pretty_printed_content = if m.ctype.mimetype.as_str() == MULTIPART_REPORT { // Pretty print JSON @@ -403,15 +418,15 @@ pub async fn thread( raw_content } }; - final_body = Body::Html(Html { - html: format!( - "{}\n
{}
", - final_body.to_html().unwrap_or_default(), - html_escape::encode_text(&pretty_printed_content) - ), - content_tree: final_body.to_html_content_tree().unwrap_or_default(), - }); + current_html.push_str(&format!( + "\n
{}
", + html_escape::encode_text(&pretty_printed_content) + )); } + final_body = Body::Html(Html { + html: current_html, + content_tree: final_body.to_html_content_tree().unwrap_or_default(), + }); messages.push(Message { id: format!("id:{}", id), @@ -600,7 +615,7 @@ fn extract_zip(m: &ParsedMail) -> Result { extract_unhandled(m) } -fn extract_gzip(m: &ParsedMail) -> Result { +fn extract_gzip(m: &ParsedMail) -> Result<(Body, Option), ServerError> { let pcd = m.get_content_disposition(); let filename = pcd.params.get("filename").map(|s| s.to_lowercase()); @@ -614,22 +629,22 @@ fn extract_gzip(m: &ParsedMail) -> Result { if decoder.read_to_string(&mut xml).is_ok() { match parse_dmarc_report(&xml) { Ok(report) => { - return Ok(Body::html(format!( + return Ok((Body::html(format!( "
DMARC report summary:
{}
", report - ))); + )), Some(xml))); } Err(e) => { - return Ok(Body::html(format!( + return Ok((Body::html(format!( "
Failed to parse DMARC report XML: {}
", e - ))); + )), None)); } } } } } - extract_unhandled(m) + Ok((extract_unhandled(m)?, None)) } fn extract_report(m: &ParsedMail, _part_addr: &mut Vec) -> Result { @@ -756,7 +771,9 @@ fn extract_unhandled(m: &ParsedMail) -> Result { } fn is_dmarc_report_filename(name: &str) -> bool { - (name.ends_with(".xml.gz") || name.ends_with(".xml")) && name.contains("!") + let is = (name.ends_with(".xml.gz") || name.ends_with(".xml")) && name.contains("!"); + error!("info_span {name}: {is}"); + is } // multipart/alternative defines multiple representations of the same message, and clients should @@ -864,7 +881,36 @@ fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec) -> Result parts.push(extract_gzip(sp)?), + APPLICATION_GZIP => { + let (html_body, raw_xml) = extract_gzip(sp)?; + parts.push(html_body); + if let Some(xml) = raw_xml { + let pretty_printed_content = if sp.ctype.mimetype.as_str() == MULTIPART_REPORT { + // This case is for TLS reports, not DMARC. + // For DMARC, it's always XML. + // Pretty print JSON (if it were TLS) + if let Ok(parsed_json) = serde_json::from_str::(&xml) { + serde_json::to_string_pretty(&parsed_json).unwrap_or(xml) + } else { + xml + } + } else { + // DMARC reports are XML + // Pretty print XML + let doc_result = Document::from_str(&xml); + if let Ok(doc) = doc_result { + doc.to_string_pretty_with_config(&display::Config::default_pretty()) + } else { + error!( + "Failed to parse XML for pretty printing: {:?}", + doc_result.unwrap_err() + ); + xml + } + }; + parts.push(Body::html(format!("\n
{}
", html_escape::encode_text(&pretty_printed_content)))); + } + } mt => parts.push(unhandled_html(MULTIPART_MIXED, mt)), } part_addr.pop();