letterbox/server/src/email_extract.rs

1257 lines
44 KiB
Rust

use std::io::{Cursor, Read};
use askama::Template;
use chrono::{TimeZone, Utc};
use mailparse::{parse_content_type, parse_mail, MailHeader, MailHeaderMap, ParsedMail};
use quick_xml::de::from_str as xml_from_str;
use tracing::{error, info, warn};
use zip::ZipArchive;
use crate::{
error::ServerError,
graphql::{Attachment, Body, DispositionType, Email, Html, PlainText, UnhandledContentType},
linkify_html,
};
const APPLICATION_GZIP: &'static str = "application/gzip";
const APPLICATION_ZIP: &'static str = "application/zip";
const IMAGE_JPEG: &'static str = "image/jpeg";
const IMAGE_PJPEG: &'static str = "image/pjpeg";
const IMAGE_PNG: &'static str = "image/png";
const MESSAGE_RFC822: &'static str = "message/rfc822";
const MULTIPART_ALTERNATIVE: &'static str = "multipart/alternative";
const MULTIPART_MIXED: &'static str = "multipart/mixed";
const MULTIPART_RELATED: &'static str = "multipart/related";
const MULTIPART_REPORT: &'static str = "multipart/report";
const TEXT_HTML: &'static str = "text/html";
const TEXT_PLAIN: &'static str = "text/plain";
pub fn email_addresses(
_path: &str,
m: &ParsedMail,
header_name: &str,
) -> Result<Vec<Email>, ServerError> {
let mut addrs = Vec::new();
for header_value in m.headers.get_all_values(header_name) {
match mailparse::addrparse(&header_value) {
Ok(mal) => {
for ma in mal.into_inner() {
match ma {
mailparse::MailAddr::Group(gi) => {
if !gi.group_name.contains("ndisclosed") {}
}
mailparse::MailAddr::Single(s) => addrs.push(Email {
name: s.display_name,
addr: Some(s.addr),
photo_url: None,
}), //println!("Single: {s}"),
}
}
}
Err(_) => {
let v = header_value;
if v.matches('@').count() == 1 {
if v.matches('<').count() == 1 && v.ends_with('>') {
let idx = v.find('<').unwrap();
let addr = &v[idx + 1..v.len() - 1].trim();
let name = &v[..idx].trim();
addrs.push(Email {
name: Some(name.to_string()),
addr: Some(addr.to_string()),
photo_url: None,
});
}
} else {
addrs.push(Email {
name: Some(v),
addr: None,
photo_url: None,
});
}
}
}
}
Ok(addrs)
}
pub fn extract_body(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let body = m.get_body()?;
let ret = match m.ctype.mimetype.as_str() {
TEXT_PLAIN => return Ok(Body::text(body)),
TEXT_HTML => return Ok(Body::html(body)),
MULTIPART_MIXED => extract_mixed(m, part_addr),
MULTIPART_ALTERNATIVE => extract_alternative(m, part_addr),
MULTIPART_RELATED => extract_related(m, part_addr),
MULTIPART_REPORT => extract_report(m, part_addr),
// APPLICATION_ZIP and APPLICATION_GZIP are handled in the thread function
APPLICATION_ZIP => extract_unhandled(m),
APPLICATION_GZIP => extract_unhandled(m),
_ => extract_unhandled(m),
};
if let Err(err) = ret {
error!("Failed to extract body: {:?}", err);
return Ok(extract_unhandled(m)?);
}
ret
}
pub fn extract_zip(m: &ParsedMail) -> Result<Body, ServerError> {
if let Ok(zip_bytes) = m.get_body_raw() {
if let Ok(mut archive) = ZipArchive::new(Cursor::new(&zip_bytes)) {
for i in 0..archive.len() {
if let Ok(mut file) = archive.by_index(i) {
let name = file.name().to_lowercase();
// Google DMARC reports are typically named like "google.com!example.com!...xml"
// and may or may not contain "dmarc" in the filename.
if is_dmarc_report_filename(&name) {
let mut xml = String::new();
use std::io::Read;
if file.read_to_string(&mut xml).is_ok() {
match parse_dmarc_report(&xml) {
Ok(report) => {
return Ok(Body::html(format!(
"<div class=\"dmarc-report\">DMARC report summary:<br>{}</div>",
report
)));
}
Err(e) => {
return Ok(Body::html(format!(
"<div class=\"dmarc-report-error\">Failed to parse DMARC report XML: {}</div>",
e
)));
}
}
}
}
}
}
}
}
// If no DMARC report found, fall through to unhandled
extract_unhandled(m)
}
pub fn extract_gzip(m: &ParsedMail) -> Result<(Body, Option<String>), ServerError> {
let pcd = m.get_content_disposition();
let filename = pcd.params.get("filename").map(|s| s.to_lowercase());
let is_dmarc_xml_file = filename.map_or(false, |name| is_dmarc_report_filename(&name));
if is_dmarc_xml_file {
if let Ok(gz_bytes) = m.get_body_raw() {
let mut decoder = flate2::read::GzDecoder::new(&gz_bytes[..]);
let mut xml = String::new();
use std::io::Read;
if decoder.read_to_string(&mut xml).is_ok() {
match parse_dmarc_report(&xml) {
Ok(report) => {
return Ok((
Body::html(format!(
"<div class=\"dmarc-report\">DMARC report summary:<br>{}</div>",
report
)),
Some(xml),
));
}
Err(e) => {
return Ok((Body::html(format!(
"<div class=\"dmarc-report-error\">Failed to parse DMARC report XML: {}</div>",
e
)), None));
}
}
}
}
}
Ok((extract_unhandled(m)?, None))
}
pub fn extract_report(m: &ParsedMail, _part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
let mut html_part = None;
let mut tlsrpt_part = None;
for sp in &m.subparts {
match sp.ctype.mimetype.as_str() {
TEXT_HTML => html_part = Some(sp.get_body()?),
"application/tlsrpt+gzip" => tlsrpt_part = Some(sp.get_body_raw()?),
_ => {} // Ignore other parts for now
}
}
let tlsrpt_summary_html = if let Some(gz_bytes) = tlsrpt_part {
let mut decoder = flate2::read::GzDecoder::new(&gz_bytes[..]);
let mut buffer = Vec::new();
if decoder.read_to_end(&mut buffer).is_ok() {
if let Ok(json_str) = String::from_utf8(buffer) {
match serde_json::from_str::<TlsRpt>(&json_str) {
Ok(tlsrpt) => {
let formatted_tlsrpt = FormattedTlsRpt {
organization_name: tlsrpt.organization_name,
date_range: FormattedTlsRptDateRange {
start_datetime: tlsrpt.date_range.start_datetime,
end_datetime: tlsrpt.date_range.end_datetime,
},
contact_info: tlsrpt.contact_info.unwrap_or_else(|| "".to_string()),
report_id: tlsrpt.report_id,
policies: tlsrpt
.policies
.into_iter()
.map(|policy| FormattedTlsRptPolicy {
policy: FormattedTlsRptPolicyDetails {
policy_type: policy.policy.policy_type,
policy_string: policy.policy.policy_string,
policy_domain: policy.policy.policy_domain,
mx_host: policy
.policy
.mx_host
.unwrap_or_else(|| Vec::new())
.into_iter()
.map(|mx| match mx {
MxHost::String(s) => FormattedTlsRptMxHost {
hostname: s,
failure_count: 0,
result_type: "".to_string(),
},
MxHost::Object(o) => FormattedTlsRptMxHost {
hostname: o.hostname,
failure_count: o.failure_count,
result_type: o.result_type,
},
})
.collect(),
},
summary: policy.summary,
failure_details: policy
.failure_details
.unwrap_or_else(|| Vec::new())
.into_iter()
.map(|detail| FormattedTlsRptFailureDetails {
result_type: detail.result_type,
sending_mta_ip: detail
.sending_mta_ip
.unwrap_or_else(|| "".to_string()),
receiving_ip: detail
.receiving_ip
.unwrap_or_else(|| "".to_string()),
receiving_mx_hostname: detail
.receiving_mx_hostname
.unwrap_or_else(|| "".to_string()),
failed_session_count: detail.failed_session_count,
additional_info: detail
.additional_info
.unwrap_or_else(|| "".to_string()),
failure_reason_code: detail
.failure_reason_code
.unwrap_or_else(|| "".to_string()),
})
.collect(),
})
.collect(),
};
let template = TlsReportTemplate {
report: &formatted_tlsrpt,
};
template.render().unwrap_or_else(|e| format!("<div class=\"tlsrpt-error\">Failed to render TLS report template: {}</div>", e))
}
Err(e) => format!(
"<div class=\"tlsrpt-error\">Failed to parse TLS report JSON: {}</div>",
e
),
}
} else {
format!("<div class=\"tlsrpt-error\">Failed to convert decompressed data to UTF-8.</div>")
}
} else {
format!("<div class=\"tlsrpt-error\">Failed to decompressed data.</div>")
}
} else {
"".to_string()
};
let final_html = if let Some(html) = html_part {
format!("{}<hr>{} ", html, tlsrpt_summary_html)
} else {
tlsrpt_summary_html
};
Ok(Body::html(final_html))
}
pub fn extract_unhandled(m: &ParsedMail) -> Result<Body, ServerError> {
let msg = format!(
"Unhandled body content type:\n{}\n{}",
render_content_type_tree(m),
m.get_body()?,
);
Ok(Body::UnhandledContentType(UnhandledContentType {
text: msg,
content_tree: render_content_type_tree(m),
}))
}
pub fn is_dmarc_report_filename(name: &str) -> bool {
let is = (name.ends_with(".xml.gz") || name.ends_with(".xml")) && name.contains('!');
error!("info_span {}: {}", name, is);
is
}
// multipart/alternative defines multiple representations of the same message, and clients should
// show the fanciest they can display. For this program, the priority is text/html, text/plain,
// then give up.
pub fn extract_alternative(
m: &ParsedMail,
part_addr: &mut Vec<String>,
) -> Result<Body, ServerError> {
let handled_types = vec![
MULTIPART_ALTERNATIVE,
MULTIPART_MIXED,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
];
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_ALTERNATIVE {
return extract_alternative(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_MIXED {
return extract_mixed(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == MULTIPART_RELATED {
return extract_related(sp, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype.as_str() == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_alternative failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
// multipart/mixed defines multiple types of context all of which should be presented to the user
// 'serially'.
pub fn extract_mixed(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
//todo!("add some sort of visual indicator there are unhandled types, i.e. .ics files");
let handled_types = vec![
IMAGE_JPEG,
IMAGE_PJPEG,
IMAGE_PNG,
MESSAGE_RFC822,
MULTIPART_ALTERNATIVE,
MULTIPART_RELATED,
TEXT_HTML,
TEXT_PLAIN,
APPLICATION_GZIP,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!(
"{} contains the following unhandled mimetypes {:?}",
MULTIPART_MIXED, unhandled_types
);
}
let mut parts = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
part_addr.push(idx.to_string());
match sp.ctype.mimetype.as_str() {
MESSAGE_RFC822 => parts.push(extract_rfc822(&sp, part_addr)?),
MULTIPART_RELATED => parts.push(extract_related(sp, part_addr)?),
MULTIPART_ALTERNATIVE => parts.push(extract_alternative(sp, part_addr)?),
TEXT_PLAIN => parts.push(Body::text(sp.get_body()?)),
TEXT_HTML => parts.push(Body::html(sp.get_body()?)),
IMAGE_PJPEG | IMAGE_JPEG | IMAGE_PNG => {
let pcd = sp.get_content_disposition();
let filename = pcd
.params
.get("filename")
.map(|s| s.clone())
.unwrap_or("".to_string());
// Only add inline images, attachments are handled as an attribute of the top level Message and rendered separate client-side.
if pcd.disposition == mailparse::DispositionType::Inline {
// TODO: make URL generation more programatic based on what the frontend has
// mapped
parts.push(Body::html(format!(
r#"<img src="/api/view/attachment/{}/{}/{}">"#,
part_addr[0],
part_addr
.iter()
.skip(1)
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("."),
filename
)));
}
}
APPLICATION_GZIP => {
let (html_body, raw_xml) = extract_gzip(sp)?;
parts.push(html_body);
if let Some(xml) = raw_xml {
let pretty_printed_content = if sp.ctype.mimetype.as_str() == MULTIPART_REPORT {
// This case is for TLS reports, not DMARC.
// For DMARC, it's always XML.
// Pretty print JSON (if it were TLS)
if let Ok(parsed_json) = serde_json::from_str::<serde_json::Value>(&xml) {
serde_json::to_string_pretty(&parsed_json).unwrap_or(xml)
} else {
xml
}
} else {
// DMARC reports are XML
// Pretty print XML
match pretty_print_xml_with_trimming(&xml) {
Ok(pretty_xml) => pretty_xml,
Err(e) => {
error!("Failed to pretty print XML: {:?}", e);
xml
}
}
};
parts.push(Body::html(format!(
"\n<pre>{}</pre>",
html_escape::encode_text(&pretty_printed_content)
)));
}
}
mt => parts.push(unhandled_html(MULTIPART_MIXED, mt)),
}
part_addr.pop();
}
Ok(flatten_body_parts(&parts))
}
pub fn unhandled_html(parent_type: &str, child_type: &str) -> Body {
Body::Html(Html {
html: format!(
r#"
<div class="p-4 error">
Unhandled mimetype {} in a {} message
</div>
"#,
child_type, parent_type
),
content_tree: String::new(),
})
}
pub fn flatten_body_parts(parts: &[Body]) -> Body {
let html = parts
.iter()
.map(|p| match p {
Body::PlainText(PlainText { text, .. }) => {
format!(
r#"<p class="view-part-text-plain font-mono whitespace-pre-line">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&html_escape::encode_text(text).trim_matches('\n'))
)
}
Body::Html(Html { html, .. }) => html.clone(),
Body::UnhandledContentType(UnhandledContentType { text, .. }) => {
error!("text len {}", text.len());
format!(
r#"<p class="view-part-unhandled">{}</p>"#,
// Trim newlines to prevent excessive white space at the beginning/end of
// presenation. Leave tabs and spaces incase plain text attempts to center a
// header on the first line.
linkify_html(&html_escape::encode_text(text).trim_matches('\n'))
)
}
})
.collect::<Vec<_>>()
.join("\n");
info!("flatten_body_parts {}", parts.len());
Body::html(html)
}
pub fn extract_related(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
// TODO(wathiede): collect related things and change return type to new Body arm.
let handled_types = vec![
MULTIPART_ALTERNATIVE,
TEXT_HTML,
TEXT_PLAIN,
IMAGE_JPEG,
IMAGE_PJPEG,
IMAGE_PNG,
];
let mut unhandled_types: Vec<_> = m
.subparts
.iter()
.map(|sp| sp.ctype.mimetype.as_str())
.filter(|mt| !handled_types.contains(mt))
.collect();
unhandled_types.sort();
if !unhandled_types.is_empty() {
warn!(
"{} contains the following unhandled mimetypes {:?}",
MULTIPART_RELATED, unhandled_types
);
}
for (i, sp) in m.subparts.iter().enumerate() {
if sp.ctype.mimetype == IMAGE_PNG
|| sp.ctype.mimetype == IMAGE_JPEG
|| sp.ctype.mimetype == IMAGE_PJPEG
{
info!("sp.ctype {:#?}", sp.ctype);
//info!("sp.headers {:#?}", sp.headers);
if let Some(cid) = sp.headers.get_first_value("Content-Id") {
let mut part_id = part_addr.clone();
part_id.push(i.to_string());
info!("cid: {} part_id {:?}", cid, part_id);
}
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == MULTIPART_ALTERNATIVE {
return extract_alternative(m, part_addr);
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_HTML {
let body = sp.get_body()?;
return Ok(Body::html(body));
}
}
for sp in &m.subparts {
if sp.ctype.mimetype == TEXT_PLAIN {
let body = sp.get_body()?;
return Ok(Body::text(body));
}
}
Err(ServerError::StringError(format!(
"extract_related failed to find suitable subpart, searched: {:?}",
handled_types
)))
}
pub fn walk_attachments<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
) -> Option<T> {
let mut cur_addr = Vec::new();
walk_attachments_inner(m, visitor, &mut cur_addr)
}
pub fn walk_attachments_inner<T, F: Fn(&ParsedMail, &[usize]) -> Option<T> + Copy>(
m: &ParsedMail,
visitor: F,
cur_addr: &mut Vec<usize>,
) -> Option<T> {
for (idx, sp) in m.subparts.iter().enumerate() {
cur_addr.push(idx);
let val = visitor(sp, &cur_addr);
if val.is_some() {
return val;
}
let val = walk_attachments_inner(sp, visitor, cur_addr);
if val.is_some() {
return val;
}
cur_addr.pop();
}
None
}
// TODO(wathiede): make this walk_attachments that takes a closure.
// Then implement one closure for building `Attachment` and imlement another that can be used to
// get the bytes for serving attachments of HTTP
pub fn extract_attachments(m: &ParsedMail, id: &str) -> Result<Vec<Attachment>, ServerError> {
let mut attachments = Vec::new();
for (idx, sp) in m.subparts.iter().enumerate() {
if let Some(attachment) = extract_attachment(sp, id, &[idx]) {
// Filter out inline attachements, they're flattened into the body of the message.
if attachment.disposition == DispositionType::Attachment {
attachments.push(attachment);
}
}
}
Ok(attachments)
}
pub fn extract_attachment(m: &ParsedMail, id: &str, idx: &[usize]) -> Option<Attachment> {
let pcd = m.get_content_disposition();
let pct = m
.get_headers()
.get_first_value("Content-Type")
.map(|s| parse_content_type(&s));
let filename = match (
pcd.params.get("filename").map(|f| f.clone()),
pct.map(|pct| pct.params.get("name").map(|f| f.clone())),
) {
// Use filename from Content-Disposition
(Some(filename), _) => filename,
// Use filename from Content-Type
(_, Some(Some(name))) => name,
// No known filename, assume it's not an attachment
_ => return None,
};
info!("filename {}", filename);
// TODO: grab this from somewhere
let content_id = None;
let bytes = match m.get_body_raw() {
Ok(bytes) => bytes,
Err(err) => {
error!("failed to get body for attachment: {}", err);
return None;
}
};
return Some(Attachment {
id: id.to_string(),
idx: idx
.iter()
.map(|i| i.to_string())
.collect::<Vec<String>>()
.join("."),
disposition: pcd.disposition.into(),
filename: Some(filename),
size: bytes.len(),
// TODO: what is the default for ctype?
// TODO: do we want to use m.ctype.params for anything?
content_type: Some(m.ctype.mimetype.clone()),
content_id,
bytes,
});
}
pub fn email_address_strings(emails: &[Email]) -> Vec<String> {
emails
.iter()
.map(|e| e.to_string())
.inspect(|e| info!("e {}", e))
.collect()
}
pub fn extract_rfc822(m: &ParsedMail, part_addr: &mut Vec<String>) -> Result<Body, ServerError> {
fn extract_headers(m: &ParsedMail) -> Result<Body, ServerError> {
let path = "<in-memory>";
let from = email_address_strings(&email_addresses(path, &m, "from")?).join(", ");
let to = email_address_strings(&email_addresses(path, &m, "to")?).join(", ");
let cc = email_address_strings(&email_addresses(path, &m, "cc")?).join(", ");
let date = m.headers.get_first_value("date").unwrap_or(String::new());
let subject = m
.headers
.get_first_value("subject")
.unwrap_or(String::new());
let text = format!(
r#"
---------- Forwarded message ----------
From: {}
To: {}
CC: {}
Date: {}
Subject: {}
"#,
from, to, cc, date, subject
);
Ok(Body::text(text))
}
let inner_body = m.get_body()?;
let inner_m = parse_mail(inner_body.as_bytes())?;
let headers = extract_headers(&inner_m)?;
let body = extract_body(&inner_m, part_addr)?;
Ok(flatten_body_parts(&[headers, body]))
}
pub fn get_attachment_filename(header_value: &str) -> &str {
info!("get_attachment_filename {}", header_value);
// Strip last "
let v = &header_value[..header_value.len() - 1];
if let Some(idx) = v.rfind('"') {
&v[idx + 1..]
} else {
""
}
}
pub fn get_content_type<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
if let Some(v) = headers.get_first_value("Content-Type") {
if let Some(idx) = v.find(';') {
return Some(v[..idx].to_string());
} else {
return Some(v);
}
}
None
}
pub fn get_content_id<'a>(headers: &[MailHeader<'a>]) -> Option<String> {
headers.get_first_value("Content-Id")
}
pub fn render_content_type_tree(m: &ParsedMail) -> String {
const WIDTH: usize = 4;
const SKIP_HEADERS: [&str; 4] = [
"Authentication-Results",
"DKIM-Signature",
"Received",
"Received-SPF",
];
fn render_ct_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
for sp in &m.subparts {
parts.push(render_ct_rec(sp, depth + 1))
}
parts.join("\n")
}
fn render_rec(m: &ParsedMail, depth: usize) -> String {
let mut parts = Vec::new();
let msg = format!("{} {}", "-".repeat(depth * WIDTH), m.ctype.mimetype);
parts.push(msg);
let indent = " ".repeat(depth * WIDTH);
if !m.ctype.charset.is_empty() {
parts.push(format!("{} Character Set: {}", indent, m.ctype.charset));
}
for (k, v) in m.ctype.params.iter() {
parts.push(format!("{} {}: {}", indent, k, v));
}
if !m.headers.is_empty() {
parts.push(format!("{} == headers ==", indent));
for h in &m.headers {
if h.get_key().starts_with('X') {
continue;
}
if SKIP_HEADERS.contains(&h.get_key().as_str()) {
continue;
}
parts.push(format!("{} {}: {}", indent, h.get_key_ref(), h.get_value()));
}
}
for sp in &m.subparts {
parts.push(render_rec(sp, depth + 1))
}
parts.join("\n")
}
format!(
"Outline:\n{}\n\nDetailed:\n{}\n\nNot showing headers:\n {}\n X.*",
render_ct_rec(m, 1),
render_rec(m, 1),
SKIP_HEADERS.join("\n ")
)
}
// Add this helper function to parse the DMARC XML and summarize it.
#[derive(Debug, serde::Deserialize)]
pub struct FormattedDateRange {
pub begin: String,
pub end: String,
}
pub struct FormattedReportMetadata {
pub org_name: String,
pub email: String,
pub report_id: String,
pub date_range: Option<FormattedDateRange>,
}
pub struct FormattedRecord {
pub source_ip: String,
pub count: String,
pub header_from: String,
pub envelope_to: String,
pub disposition: String,
pub dkim: String,
pub spf: String,
pub reason: Vec<String>,
pub auth_results: Option<FormattedAuthResults>,
}
pub struct FormattedAuthResults {
pub dkim: Vec<FormattedAuthDKIM>,
pub spf: Vec<FormattedAuthSPF>,
}
pub struct FormattedAuthDKIM {
pub domain: String,
pub result: String,
pub selector: String,
}
pub struct FormattedAuthSPF {
pub domain: String,
pub result: String,
pub scope: String,
}
pub struct FormattedPolicyPublished {
pub domain: String,
pub adkim: String,
pub aspf: String,
pub p: String,
pub sp: String,
pub pct: String,
}
pub struct FormattedFeedback {
pub report_metadata: Option<FormattedReportMetadata>,
pub policy_published: Option<FormattedPolicyPublished>,
pub record: Option<Vec<FormattedRecord>>,
pub has_envelope_to: bool,
}
#[derive(Debug, serde::Deserialize)]
pub struct Feedback {
pub report_metadata: Option<ReportMetadata>,
pub policy_published: Option<PolicyPublished>,
pub record: Option<Vec<Record>>,
}
#[derive(Debug, serde::Deserialize)]
pub struct ReportMetadata {
pub org_name: Option<String>,
pub email: Option<String>,
pub report_id: Option<String>,
pub date_range: Option<DateRange>,
}
#[derive(Debug, serde::Deserialize)]
pub struct DateRange {
pub begin: Option<u64>,
pub end: Option<u64>,
}
#[derive(Debug, serde::Deserialize)]
pub struct PolicyPublished {
pub domain: Option<String>,
pub adkim: Option<String>,
pub aspf: Option<String>,
pub p: Option<String>,
pub sp: Option<String>,
pub pct: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct Record {
pub row: Option<Row>,
pub identifiers: Option<Identifiers>,
pub auth_results: Option<AuthResults>,
}
#[derive(Debug, serde::Deserialize)]
pub struct Row {
pub source_ip: Option<String>,
pub count: Option<u64>,
pub policy_evaluated: Option<PolicyEvaluated>,
}
#[derive(Debug, serde::Deserialize)]
pub struct PolicyEvaluated {
pub disposition: Option<String>,
pub dkim: Option<String>,
pub spf: Option<String>,
pub reason: Option<Vec<Reason>>,
}
#[derive(Debug, serde::Deserialize, Clone)]
pub struct Reason {
#[serde(rename = "type")]
pub reason_type: Option<String>,
pub comment: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRpt {
#[serde(rename = "organization-name")]
pub organization_name: String,
#[serde(rename = "date-range")]
pub date_range: TlsRptDateRange,
#[serde(rename = "contact-info")]
pub contact_info: Option<String>,
#[serde(rename = "report-id")]
pub report_id: String,
pub policies: Vec<TlsRptPolicy>,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptDateRange {
#[serde(rename = "start-datetime")]
pub start_datetime: String,
#[serde(rename = "end-datetime")]
pub end_datetime: String,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptPolicy {
pub policy: TlsRptPolicyDetails,
pub summary: TlsRptSummary,
#[serde(rename = "failure-details")]
pub failure_details: Option<Vec<TlsRptFailureDetails>>,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptPolicyDetails {
#[serde(rename = "policy-type")]
pub policy_type: String,
#[serde(rename = "policy-string")]
pub policy_string: Vec<String>,
#[serde(rename = "policy-domain")]
pub policy_domain: String,
#[serde(rename = "mx-host")]
pub mx_host: Option<Vec<MxHost>>,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptSummary {
#[serde(rename = "total-successful-session-count")]
pub total_successful_session_count: u64,
#[serde(rename = "total-failure-session-count")]
pub total_failure_session_count: u64,
}
#[derive(Debug, serde::Deserialize)]
#[serde(untagged)]
pub enum MxHost {
String(String),
Object(TlsRptMxHost),
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptMxHost {
pub hostname: String,
#[serde(rename = "failure-count")]
pub failure_count: u64,
#[serde(rename = "result-type")]
pub result_type: String,
}
#[derive(Debug, serde::Deserialize)]
pub struct TlsRptFailureDetails {
#[serde(rename = "result-type")]
pub result_type: String,
#[serde(rename = "sending-mta-ip")]
pub sending_mta_ip: Option<String>,
#[serde(rename = "receiving-ip")]
pub receiving_ip: Option<String>,
#[serde(rename = "receiving-mx-hostname")]
pub receiving_mx_hostname: Option<String>,
#[serde(rename = "failed-session-count")]
pub failed_session_count: u64,
#[serde(rename = "additional-info")]
pub additional_info: Option<String>,
#[serde(rename = "failure-reason-code")]
pub failure_reason_code: Option<String>,
}
#[derive(Debug)]
pub struct FormattedTlsRpt {
pub organization_name: String,
pub date_range: FormattedTlsRptDateRange,
pub contact_info: String,
pub report_id: String,
pub policies: Vec<FormattedTlsRptPolicy>,
}
#[derive(Debug)]
pub struct FormattedTlsRptDateRange {
pub start_datetime: String,
pub end_datetime: String,
}
#[derive(Debug)]
pub struct FormattedTlsRptPolicy {
pub policy: FormattedTlsRptPolicyDetails,
pub summary: TlsRptSummary,
pub failure_details: Vec<FormattedTlsRptFailureDetails>,
}
#[derive(Debug)]
pub struct FormattedTlsRptPolicyDetails {
pub policy_type: String,
pub policy_string: Vec<String>,
pub policy_domain: String,
pub mx_host: Vec<FormattedTlsRptMxHost>,
}
#[derive(Debug)]
pub struct FormattedTlsRptMxHost {
pub hostname: String,
pub failure_count: u64,
pub result_type: String,
}
#[derive(Debug)]
pub struct FormattedTlsRptFailureDetails {
pub result_type: String,
pub sending_mta_ip: String,
pub receiving_ip: String,
pub receiving_mx_hostname: String,
pub failed_session_count: u64,
pub additional_info: String,
pub failure_reason_code: String,
}
#[derive(Debug, serde::Deserialize)]
pub struct Identifiers {
pub header_from: Option<String>,
pub envelope_to: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct AuthResults {
pub dkim: Option<Vec<AuthDKIM>>,
pub spf: Option<Vec<AuthSPF>>,
}
#[derive(Debug, serde::Deserialize)]
pub struct AuthDKIM {
pub domain: Option<String>,
pub result: Option<String>,
pub selector: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct AuthSPF {
pub domain: Option<String>,
pub result: Option<String>,
pub scope: Option<String>,
}
#[derive(Template)]
#[template(path = "dmarc_report.html")]
pub struct DmarcReportTemplate<'a> {
pub report: &'a FormattedFeedback,
}
#[derive(Template)]
#[template(path = "tls_report.html")]
pub struct TlsReportTemplate<'a> {
pub report: &'a FormattedTlsRpt,
}
// Add this helper function to parse the DMARC XML and summarize it.
pub fn parse_dmarc_report(xml: &str) -> Result<String, ServerError> {
let feedback: Feedback = xml_from_str(xml)
.map_err(|e| ServerError::StringError(format!("DMARC XML parse error: {}", e)))?;
let formatted_report_metadata = feedback.report_metadata.map(|meta| {
let date_range = meta.date_range.map(|dr| FormattedDateRange {
begin: match Utc.timestamp_opt(dr.begin.unwrap_or(0) as i64, 0) {
chrono::LocalResult::Single(d) => Some(d),
_ => None,
}
.map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
.unwrap_or_else(|| "".to_string()),
end: match Utc.timestamp_opt(dr.end.unwrap_or(0) as i64, 0) {
chrono::LocalResult::Single(d) => Some(d),
_ => None,
}
.map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
.unwrap_or_else(|| "".to_string()),
});
FormattedReportMetadata {
org_name: meta.org_name.unwrap_or_else(|| "".to_string()),
email: meta.email.unwrap_or_else(|| "".to_string()),
report_id: meta.report_id.unwrap_or_else(|| "".to_string()),
date_range,
}
});
let formatted_record = feedback.record.map(|records| {
records
.into_iter()
.map(|rec| {
let auth_results = rec.auth_results.map(|auth| {
let dkim = auth
.dkim
.map(|dkims| {
dkims
.into_iter()
.map(|d| FormattedAuthDKIM {
domain: d.domain.unwrap_or_else(|| "".to_string()),
result: d.result.unwrap_or_else(|| "".to_string()),
selector: d.selector.unwrap_or_else(|| "".to_string()),
})
.collect()
})
.unwrap_or_else(|| Vec::new());
let spf = auth
.spf
.map(|spfs| {
spfs.into_iter()
.map(|s| FormattedAuthSPF {
domain: s.domain.unwrap_or_else(|| "".to_string()),
result: s.result.unwrap_or_else(|| "".to_string()),
scope: s.scope.unwrap_or_else(|| "".to_string()),
})
.collect()
})
.unwrap_or_else(|| Vec::new());
FormattedAuthResults { dkim, spf }
});
FormattedRecord {
source_ip: rec
.row
.as_ref()
.and_then(|r| r.source_ip.clone())
.unwrap_or_else(|| "".to_string()),
count: rec
.row
.as_ref()
.and_then(|r| r.count.map(|c| c.to_string()))
.unwrap_or_else(|| "".to_string()),
header_from: rec
.identifiers
.as_ref()
.and_then(|i| i.header_from.clone())
.unwrap_or_else(|| "".to_string()),
envelope_to: rec
.identifiers
.as_ref()
.and_then(|i| i.envelope_to.clone())
.unwrap_or_else(|| "".to_string()),
disposition: rec
.row
.as_ref()
.and_then(|r| r.policy_evaluated.as_ref())
.and_then(|p| p.disposition.clone())
.unwrap_or_else(|| "".to_string()),
dkim: rec
.row
.as_ref()
.and_then(|r| r.policy_evaluated.as_ref())
.and_then(|p| p.dkim.clone())
.unwrap_or_else(|| "".to_string()),
spf: rec
.row
.as_ref()
.and_then(|r| r.policy_evaluated.as_ref())
.and_then(|p| p.spf.clone())
.unwrap_or_else(|| "".to_string()),
reason: rec
.row
.as_ref()
.and_then(|r| r.policy_evaluated.as_ref())
.and_then(|p| p.reason.clone())
.unwrap_or_else(|| Vec::new())
.into_iter()
.map(|r| {
let mut s = String::new();
if let Some(reason_type) = r.reason_type {
s.push_str(&format!("Type: {}", reason_type));
}
if let Some(comment) = r.comment {
if !s.is_empty() {
s.push_str(", ");
}
s.push_str(&format!("Comment: {}", comment));
}
s
})
.collect(),
auth_results,
}
})
.collect()
});
let formatted_policy_published =
feedback
.policy_published
.map(|pol| FormattedPolicyPublished {
domain: pol.domain.unwrap_or_else(|| "".to_string()),
adkim: pol.adkim.unwrap_or_else(|| "".to_string()),
aspf: pol.aspf.unwrap_or_else(|| "".to_string()),
p: pol.p.unwrap_or_else(|| "".to_string()),
sp: pol.sp.unwrap_or_else(|| "".to_string()),
pct: pol.pct.unwrap_or_else(|| "".to_string()),
});
let has_envelope_to = formatted_record
.as_ref()
.map_or(false, |r: &Vec<FormattedRecord>| {
r.iter().any(|rec| !rec.envelope_to.is_empty())
});
let formatted_feedback = FormattedFeedback {
report_metadata: formatted_report_metadata,
policy_published: formatted_policy_published,
record: formatted_record,
has_envelope_to,
};
let template = DmarcReportTemplate {
report: &formatted_feedback,
};
let html = template.render()?;
Ok(html)
}
pub fn pretty_print_xml_with_trimming(xml_input: &str) -> Result<String, ServerError> {
use std::io::Cursor;
use quick_xml::{
events::{BytesText, Event},
reader::Reader,
writer::Writer,
};
let mut reader = Reader::from_str(xml_input);
reader.config_mut().trim_text(true);
let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4);
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Eof) => break,
Ok(Event::Text(e)) => {
let trimmed_text = e.decode()?.trim().to_string();
writer.write_event(Event::Text(BytesText::new(&trimmed_text)))?;
}
Ok(event) => {
writer.write_event(event)?;
}
Err(e) => {
return Err(ServerError::StringError(format!(
"XML parsing error: {}",
e
)))
}
}
buf.clear();
}
let result = writer.into_inner().into_inner();
Ok(String::from_utf8(result)?)
}
#[cfg(test)]
mod tests {
use std::fs;
use super::*;
#[test]
fn test_parse_dmarc_report() {
let xml = fs::read_to_string("testdata/dmarc-example.xml").unwrap();
let html = parse_dmarc_report(&xml).unwrap();
assert!(html.contains("hotmail.com"));
assert!(html.contains("msn.com"));
}
#[test]
fn test_parse_dmarc_report_no_envelope_to() {
let xml = fs::read_to_string("testdata/dmarc-example-no-envelope-to.xml").unwrap();
let html = parse_dmarc_report(&xml).unwrap();
assert!(!html.contains("Envelope To"));
}
}