Use rayon to parallelize walk, this speeds up access over NFS.
This commit is contained in:
parent
88d6c6867d
commit
80b0412441
112
Cargo.lock
generated
112
Cargo.lock
generated
@ -17,6 +17,12 @@ version = "1.0.69"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
|
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "base64"
|
name = "base64"
|
||||||
version = "0.13.1"
|
version = "0.13.1"
|
||||||
@ -57,6 +63,49 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-channel"
|
||||||
|
version = "0.5.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"memoffset",
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crypto-common"
|
name = "crypto-common"
|
||||||
version = "0.1.6"
|
version = "0.1.6"
|
||||||
@ -83,6 +132,12 @@ dependencies = [
|
|||||||
"crypto-common",
|
"crypto-common",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "email"
|
name = "email"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -90,6 +145,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"mailparse",
|
"mailparse",
|
||||||
"memmap",
|
"memmap",
|
||||||
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"sha1",
|
"sha1",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@ -115,6 +171,15 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.2.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.139"
|
version = "0.2.139"
|
||||||
@ -148,6 +213,25 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memoffset"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.51"
|
version = "1.0.51"
|
||||||
@ -172,6 +256,28 @@ version = "0.4.7"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6"
|
checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-channel",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"num_cpus",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.7.0"
|
version = "1.7.0"
|
||||||
@ -198,6 +304,12 @@ dependencies = [
|
|||||||
"winapi-util",
|
"winapi-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scopeguard"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sha1"
|
name = "sha1"
|
||||||
version = "0.10.5"
|
version = "0.10.5"
|
||||||
|
|||||||
@ -10,6 +10,7 @@ edition = "2018"
|
|||||||
anyhow = "1.0.69"
|
anyhow = "1.0.69"
|
||||||
mailparse = "0.14.0"
|
mailparse = "0.14.0"
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
|
rayon = "1.6.1"
|
||||||
regex = "1.7.0"
|
regex = "1.7.0"
|
||||||
sha1 = "0.10.5"
|
sha1 = "0.10.5"
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.38"
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
use std::{collections::HashMap, fs::remove_file};
|
use std::{collections::HashMap, fs::remove_file};
|
||||||
|
|
||||||
use email::hash_file;
|
use email::hash_file;
|
||||||
|
use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
|
const ENV_VAR_TO_DELETE: &str = "DELETE_DUPES";
|
||||||
|
|
||||||
@ -11,6 +12,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.map(|dir| {
|
.map(|dir| {
|
||||||
WalkDir::new(dir)
|
WalkDir::new(dir)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.par_bridge()
|
||||||
.filter_map(|entry| entry.ok())
|
.filter_map(|entry| entry.ok())
|
||||||
.filter_map(|entry| {
|
.filter_map(|entry| {
|
||||||
if entry.file_type().is_dir() {
|
if entry.file_type().is_dir() {
|
||||||
@ -26,12 +28,25 @@ fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.fold(HashMap::new(), |mut m, (h, arg)| {
|
.fold(
|
||||||
m.entry(h).or_insert(Vec::new()).push(arg);
|
|| HashMap::new(),
|
||||||
m
|
|mut m, (h, arg)| {
|
||||||
})
|
m.entry(h).or_insert(Vec::new()).push(arg);
|
||||||
|
m
|
||||||
|
},
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap()
|
||||||
|
// Merge maps created by parallel iteration.
|
||||||
|
.reduce(
|
||||||
|
|| HashMap::new(),
|
||||||
|
|mut acc, m| {
|
||||||
|
for (k, v) in m {
|
||||||
|
acc.entry(k).or_insert(Vec::new()).extend(v);
|
||||||
|
}
|
||||||
|
acc
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
for (hash, mut paths) in map {
|
for (hash, mut paths) in map {
|
||||||
if paths.len() == 1 {
|
if paths.len() == 1 {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user