Use rayon to parallelize walk, this speeds up access over NFS.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
use std::{collections::HashMap, fs::remove_file};
|
||||
|
||||
use email::hash_file;
|
||||
use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
|
||||
use walkdir::WalkDir;
|
||||
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
|
||||
|
||||
@@ -11,6 +12,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.map(|dir| {
|
||||
WalkDir::new(dir)
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| {
|
||||
if entry.file_type().is_dir() {
|
||||
@@ -26,12 +28,25 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
})
|
||||
.fold(HashMap::new(), |mut m, (h, arg)| {
|
||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||
m
|
||||
})
|
||||
.fold(
|
||||
|| HashMap::new(),
|
||||
|mut m, (h, arg)| {
|
||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||
m
|
||||
},
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
// Merge maps created by parallel iteration.
|
||||
.reduce(
|
||||
|| HashMap::new(),
|
||||
|mut acc, m| {
|
||||
for (k, v) in m {
|
||||
acc.entry(k).or_insert(Vec::new()).extend(v);
|
||||
}
|
||||
acc
|
||||
},
|
||||
);
|
||||
|
||||
for (hash, mut paths) in map {
|
||||
if paths.len() == 1 {
|
||||
|
||||
Reference in New Issue
Block a user