From 89e3b97cf7956a391360461a096c0c461f2ae61e Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Mon, 25 Nov 2019 11:14:58 -0800 Subject: [PATCH] Demo rust program to walk over mbox files. --- .gitignore | 2 ++ Cargo.lock | 67 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 10 ++++++ src/main.rs | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 168 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4b586d4 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,67 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "base64" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "byteorder" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "charset" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "email" +version = "0.1.0" +dependencies = [ + "mailparse 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding_rs" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "mailparse" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", + "charset 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "quoted_printable 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quoted_printable" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" +"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" +"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +"checksum charset 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4f426e64df1c3de26cbf44593c6ffff5dbfd43bbf9de0d075058558126b3fc73" +"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" +"checksum mailparse 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "51a60bad00d8aa905d31cf239f207ad4ef16c963ea53cf522d5fd7dc7f3ecfe2" +"checksum quoted_printable 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "86cedf331228892e747bb85beb130b6bb23fc628c40dde9ea01eb6becea3c798" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9087fa5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "email" +version = "0.1.0" +authors = ["Bill Thiede "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +mailparse = "0.9.2" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f2bf164 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,89 @@ +use std::env; +use std::error::Error; +use std::fs::File; +use std::io::prelude::*; +use std::process::exit; +use std::slice::Iter; + +use mailparse::dateparse; +use mailparse::MailHeaderMap; + +fn newline(b: &u8) -> bool { + *b == b'\n' +} + +fn index_of(it: &mut Iter, needle: &[u8]) -> Option { + let mut needle_ix = 0; + it.position(move |&b| { + if b == needle[needle_ix] { + needle_ix += 1; + if needle_ix == needle.len() { + return true; + } + } else if b == needle[0] { + needle_ix = 1; + } else { + needle_ix = 0; + } + false + }) +} + +fn parse_mbox(mbox_bytes: &Vec) -> Result<(), Box> { + let mut it = mbox_bytes.iter(); + let mut ix = 0; + + loop { + let mail_start = it.position(newline); + if mail_start.is_none() { + return Ok(()); + } + ix += mail_start.unwrap() + 1; + let start = ix; + + let delim = b"\nFrom "; + let next_mail_start = index_of(&mut it, delim); + + let end = match next_mail_start { + Some(x) => { + ix += x + 1; + ix - delim.len() + } + None => mbox_bytes.len(), + }; + + let mail_bytes = &mbox_bytes[start..end]; + + let mail = mailparse::parse_mail(mail_bytes).unwrap(); + println!( + "{:?} {:?} from {:?}", + match mail.headers.get_first_value("Date")? { + Some(date) => date, + None => "NO DATE".to_string(), + }, + match mail.headers.get_first_value("Subject")? { + Some(subject) => subject, + None => "NO SUBJECT".to_string(), + }, + match mail.headers.get_first_value("From")? { + Some(from) => from, + None => "NO FROM".to_string(), + }, + ); + } +} + +fn main() { + if env::args().count() <= 1 { + println!("Provide mbox files as arguments"); + exit(1); + } + let mut args = env::args(); + args.next(); // drop executable name + args.for_each(|mbox_path| { + let mut mbox = File::open(mbox_path).unwrap(); + let mut mails = Vec::new(); + mbox.read_to_end(&mut mails).unwrap(); + parse_mbox(&mails); + }); +}