Use rayon to parallelize walk, this speeds up access over NFS.

This commit is contained in:
2023-02-22 23:11:44 -08:00
parent 88d6c6867d
commit 80b0412441
3 changed files with 133 additions and 5 deletions

View File

@@ -1,6 +1,7 @@
use std::{collections::HashMap, fs::remove_file};
use email::hash_file;
use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
use walkdir::WalkDir;
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
@@ -11,6 +12,7 @@ fn main() -> anyhow::Result<()> {
.map(|dir| {
WalkDir::new(dir)
.into_iter()
.par_bridge()
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
if entry.file_type().is_dir() {
@@ -26,12 +28,25 @@ fn main() -> anyhow::Result<()> {
}
}
})
.fold(HashMap::new(), |mut m, (h, arg)| {
m.entry(h).or_insert(Vec::new()).push(arg);
m
})
.fold(
|| HashMap::new(),
|mut m, (h, arg)| {
m.entry(h).or_insert(Vec::new()).push(arg);
m
},
)
})
.unwrap();
.unwrap()
// Merge maps created by parallel iteration.
.reduce(
|| HashMap::new(),
|mut acc, m| {
for (k, v) in m {
acc.entry(k).or_insert(Vec::new()).extend(v);
}
acc
},
);
for (hash, mut paths) in map {
if paths.len() == 1 {