Use rayon to parallelize walk, this speeds up access over NFS.
This commit is contained in:
parent
88d6c6867d
commit
80b0412441
112
Cargo.lock
generated
112
Cargo.lock
generated
@ -17,6 +17,12 @@ version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
@ -57,6 +63,49 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
@ -83,6 +132,12 @@ dependencies = [
|
||||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "email"
|
||||
version = "0.1.0"
|
||||
@ -90,6 +145,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"mailparse",
|
||||
"memmap",
|
||||
"rayon",
|
||||
"regex",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
@ -115,6 +171,15 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.139"
|
||||
@ -148,6 +213,25 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.51"
|
||||
@ -172,6 +256,28 @@ version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6"
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.7.0"
|
||||
@ -198,6 +304,12 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.5"
|
||||
|
||||
@ -10,6 +10,7 @@ edition = "2018"
|
||||
anyhow = "1.0.69"
|
||||
mailparse = "0.14.0"
|
||||
memmap = "0.7.0"
|
||||
rayon = "1.6.1"
|
||||
regex = "1.7.0"
|
||||
sha1 = "0.10.5"
|
||||
thiserror = "1.0.38"
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
use std::{collections::HashMap, fs::remove_file};
|
||||
|
||||
use email::hash_file;
|
||||
use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
|
||||
use walkdir::WalkDir;
|
||||
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
|
||||
|
||||
@ -11,6 +12,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.map(|dir| {
|
||||
WalkDir::new(dir)
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| {
|
||||
if entry.file_type().is_dir() {
|
||||
@ -26,12 +28,25 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
})
|
||||
.fold(HashMap::new(), |mut m, (h, arg)| {
|
||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||
m
|
||||
})
|
||||
.fold(
|
||||
|| HashMap::new(),
|
||||
|mut m, (h, arg)| {
|
||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||
m
|
||||
},
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
// Merge maps created by parallel iteration.
|
||||
.reduce(
|
||||
|| HashMap::new(),
|
||||
|mut acc, m| {
|
||||
for (k, v) in m {
|
||||
acc.entry(k).or_insert(Vec::new()).extend(v);
|
||||
}
|
||||
acc
|
||||
},
|
||||
);
|
||||
|
||||
for (hash, mut paths) in map {
|
||||
if paths.len() == 1 {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user