Demo rust program to walk over mbox files.

This commit is contained in:
Bill Thiede 2019-11-25 11:14:58 -08:00
parent ed632802a6
commit 89e3b97cf7
4 changed files with 168 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
**/*.rs.bk

67
Cargo.lock generated Normal file
View File

@ -0,0 +1,67 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "base64"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "charset"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "email"
version = "0.1.0"
dependencies = [
"mailparse 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "encoding_rs"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "mailparse"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"charset 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"quoted_printable 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quoted_printable"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum charset 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4f426e64df1c3de26cbf44593c6ffff5dbfd43bbf9de0d075058558126b3fc73"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum mailparse 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "51a60bad00d8aa905d31cf239f207ad4ef16c963ea53cf522d5fd7dc7f3ecfe2"
"checksum quoted_printable 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "86cedf331228892e747bb85beb130b6bb23fc628c40dde9ea01eb6becea3c798"

10
Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "email"
version = "0.1.0"
authors = ["Bill Thiede <git@xinu.tv>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
mailparse = "0.9.2"

89
src/main.rs Normal file
View File

@ -0,0 +1,89 @@
use std::env;
use std::error::Error;
use std::fs::File;
use std::io::prelude::*;
use std::process::exit;
use std::slice::Iter;
use mailparse::dateparse;
use mailparse::MailHeaderMap;
fn newline(b: &u8) -> bool {
*b == b'\n'
}
fn index_of(it: &mut Iter<u8>, needle: &[u8]) -> Option<usize> {
let mut needle_ix = 0;
it.position(move |&b| {
if b == needle[needle_ix] {
needle_ix += 1;
if needle_ix == needle.len() {
return true;
}
} else if b == needle[0] {
needle_ix = 1;
} else {
needle_ix = 0;
}
false
})
}
fn parse_mbox(mbox_bytes: &Vec<u8>) -> Result<(), Box<dyn Error>> {
let mut it = mbox_bytes.iter();
let mut ix = 0;
loop {
let mail_start = it.position(newline);
if mail_start.is_none() {
return Ok(());
}
ix += mail_start.unwrap() + 1;
let start = ix;
let delim = b"\nFrom ";
let next_mail_start = index_of(&mut it, delim);
let end = match next_mail_start {
Some(x) => {
ix += x + 1;
ix - delim.len()
}
None => mbox_bytes.len(),
};
let mail_bytes = &mbox_bytes[start..end];
let mail = mailparse::parse_mail(mail_bytes).unwrap();
println!(
"{:?} {:?} from {:?}",
match mail.headers.get_first_value("Date")? {
Some(date) => date,
None => "NO DATE".to_string(),
},
match mail.headers.get_first_value("Subject")? {
Some(subject) => subject,
None => "NO SUBJECT".to_string(),
},
match mail.headers.get_first_value("From")? {
Some(from) => from,
None => "NO FROM".to_string(),
},
);
}
}
fn main() {
if env::args().count() <= 1 {
println!("Provide mbox files as arguments");
exit(1);
}
let mut args = env::args();
args.next(); // drop executable name
args.for_each(|mbox_path| {
let mut mbox = File::open(mbox_path).unwrap();
let mut mails = Vec::new();
mbox.read_to_end(&mut mails).unwrap();
parse_mbox(&mails);
});
}