Implement cleanupdupes to remove duplicate emails in a Maildir.
This commit is contained in:
69
src/bin/cleanupdupes.rs
Normal file
69
src/bin/cleanupdupes.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
use std::{collections::HashMap, fs::remove_file};
|
||||
|
||||
use email::hash_file;
|
||||
use walkdir::WalkDir;
|
||||
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let map = std::env::args()
|
||||
.skip(1)
|
||||
.nth(0)
|
||||
.map(|dir| {
|
||||
WalkDir::new(dir)
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| {
|
||||
if entry.file_type().is_dir() {
|
||||
println!("{}", entry.path().display());
|
||||
return None;
|
||||
}
|
||||
let arg = entry.path().display().to_string();
|
||||
match hash_file(&arg) {
|
||||
Ok(h) => Some((h, arg)),
|
||||
Err(e) => {
|
||||
eprintln!("{}: failed {}", arg, e);
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.fold(HashMap::new(), |mut m, (h, arg)| {
|
||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||
m
|
||||
})
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
for (hash, mut paths) in map {
|
||||
if paths.len() == 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Put files in "Oldmail" at the end of the list. We keep only the first, and we prefer to
|
||||
// remove Oldmail over all else.
|
||||
paths.sort_by(|a, b| {
|
||||
if a.contains("Oldmail") && b.contains("Oldmail") {
|
||||
a.partial_cmp(b).unwrap()
|
||||
} else if a.contains("Oldmail") {
|
||||
std::cmp::Ordering::Greater
|
||||
} else if b.contains("Oldmail") {
|
||||
std::cmp::Ordering::Less
|
||||
} else {
|
||||
a.partial_cmp(b).unwrap()
|
||||
}
|
||||
});
|
||||
|
||||
let mut it = paths.iter();
|
||||
println!("\n{hash}:");
|
||||
println!(" keep: {}", it.next().unwrap());
|
||||
for p in it {
|
||||
println!(" rm: {p}",);
|
||||
if std::env::var(ENV_VAR_TO_DELETE).is_ok() {
|
||||
println!("DELETING {p}");
|
||||
if let Some(e) = remove_file(p).err() {
|
||||
eprintln!("Failed to remove {p}: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::{env, error::Error, fs::File, io::prelude::*, process::exit, slice::Iter};
|
||||
use std::{env, fs::File, io::prelude::*, process::exit, slice::Iter};
|
||||
|
||||
use mailparse::{dateparse, MailHeaderMap};
|
||||
use mailparse::MailHeaderMap;
|
||||
|
||||
fn newline(b: &u8) -> bool {
|
||||
*b == b'\n'
|
||||
@@ -23,14 +23,14 @@ fn index_of(it: &mut Iter<u8>, needle: &[u8]) -> Option<usize> {
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_mbox(mbox_bytes: &Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
fn parse_mbox(mbox_bytes: &Vec<u8>) {
|
||||
let mut it = mbox_bytes.iter();
|
||||
let mut ix = 0;
|
||||
|
||||
loop {
|
||||
let mail_start = it.position(newline);
|
||||
if mail_start.is_none() {
|
||||
return Ok(());
|
||||
return;
|
||||
}
|
||||
ix += mail_start.unwrap() + 1;
|
||||
let start = ix;
|
||||
@@ -74,10 +74,10 @@ fn main() {
|
||||
}
|
||||
let mut args = env::args();
|
||||
args.next(); // drop executable name
|
||||
args.for_each(|mbox_path| {
|
||||
for mbox_path in args {
|
||||
let mut mbox = File::open(mbox_path).unwrap();
|
||||
let mut mails = Vec::new();
|
||||
mbox.read_to_end(&mut mails).unwrap();
|
||||
parse_mbox(&mails);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user