Compare commits

...

8 Commits

6 changed files with 650 additions and 68 deletions

392
Cargo.lock generated
View File

@@ -4,18 +4,67 @@ version = 3
[[package]]
name = "aho-corasick"
version = "0.7.20"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.69"
name = "anstream"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
[[package]]
name = "anstyle-parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
[[package]]
name = "autocfg"
@@ -30,14 +79,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "block-buffer"
version = "0.10.3"
name = "bitflags"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -55,19 +116,66 @@ dependencies = [
]
[[package]]
name = "cpufeatures"
version = "0.2.5"
name = "clap"
version = "4.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320"
checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d"
dependencies = [
"clap_builder",
"clap_derive",
"once_cell",
]
[[package]]
name = "clap_builder"
version = "4.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "cpufeatures"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
dependencies = [
"libc",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
dependencies = [
"cfg-if",
"crossbeam-utils",
@@ -75,9 +183,9 @@ dependencies = [
[[package]]
name = "crossbeam-deque"
version = "0.8.2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
dependencies = [
"cfg-if",
"crossbeam-epoch",
@@ -86,9 +194,9 @@ dependencies = [
[[package]]
name = "crossbeam-epoch"
version = "0.9.13"
version = "0.9.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
dependencies = [
"autocfg",
"cfg-if",
@@ -99,9 +207,9 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.14"
version = "0.8.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
dependencies = [
"cfg-if",
]
@@ -118,15 +226,15 @@ dependencies = [
[[package]]
name = "data-encoding"
version = "2.3.3"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"
checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
[[package]]
name = "digest"
version = "0.10.6"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
@@ -134,15 +242,16 @@ dependencies = [
[[package]]
name = "either"
version = "1.8.1"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "email"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"mailparse",
"memmap",
"rayon",
@@ -162,29 +271,70 @@ dependencies = [
]
[[package]]
name = "generic-array"
version = "0.14.6"
name = "errno"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
dependencies = [
"errno-dragonfly",
"libc",
"windows-sys",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "hermit-abi"
version = "0.2.6"
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
[[package]]
name = "is-terminal"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"libc",
"hermit-abi",
"rustix",
"windows-sys",
]
[[package]]
name = "libc"
version = "0.2.139"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "linux-raw-sys"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
[[package]]
name = "mailparse"
@@ -215,52 +365,58 @@ dependencies = [
[[package]]
name = "memoffset"
version = "0.7.1"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.15.0"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "proc-macro2"
version = "1.0.51"
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.23"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
dependencies = [
"proc-macro2",
]
[[package]]
name = "quoted_printable"
version = "0.4.7"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6"
checksum = "5a3866219251662ec3b26fc217e3e05bf9c4f84325234dfb96bf0bf840889e49"
[[package]]
name = "rayon"
version = "1.6.1"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
dependencies = [
"either",
"rayon-core",
@@ -268,9 +424,9 @@ dependencies = [
[[package]]
name = "rayon-core"
version = "1.10.2"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
@@ -280,9 +436,21 @@ dependencies = [
[[package]]
name = "regex"
version = "1.7.0"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
@@ -291,9 +459,22 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.6.28"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "rustix"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "same-file"
@@ -306,9 +487,9 @@ dependencies = [
[[package]]
name = "scopeguard"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sha1"
@@ -322,10 +503,16 @@ dependencies = [
]
[[package]]
name = "syn"
version = "1.0.108"
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d56e159d99e6c2b93995d171050271edb50ecc5288fbc7cc17de8fdce4e58c14"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
dependencies = [
"proc-macro2",
"quote",
@@ -334,18 +521,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.38"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.38"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
@@ -360,9 +547,15 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "unicode-ident"
version = "1.0.6"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
@@ -372,12 +565,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
@@ -411,3 +603,69 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
[[package]]
name = "windows_i686_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
[[package]]
name = "windows_i686_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

View File

@@ -8,6 +8,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0.69"
clap = { version = "4.1.8", features = ["derive"] }
mailparse = "0.14.0"
memmap = "0.7.0"
rayon = "1.6.1"

70
cmd/recordio/recordio.go Normal file
View File

@@ -0,0 +1,70 @@
package main
import (
"compress/gzip"
"flag"
"io"
"io/ioutil"
"os"
"path/filepath"
"github.com/eclesh/recordio"
"github.com/golang/glog"
"xinu.tv/email/maildir"
)
var (
maildirPath = flag.String("maildir", filepath.Join(os.Getenv("HOME"), "Maildir"), "Maildir path")
recordIOPath = flag.String("recordio", "/tmp/maildir.rio", "path to recordIO file")
compress = flag.Bool("z", false, "compress output")
)
func main() {
flag.Parse()
defer glog.Flush()
count := 0
f, err := os.Create(*recordIOPath)
if err != nil {
glog.Exitf("Failed to open %q: %v", *recordIOPath, err)
}
var gzf *gzip.Writer
if *compress {
gzf, err = gzip.NewWriterLevel(f, gzip.BestCompression)
if err != nil {
glog.Exitf("Failed to create gzip writer: %v", err)
}
}
var w io.Writer
if *compress {
w = recordio.NewWriter(gzf)
} else {
w = recordio.NewWriter(f)
}
if err := maildir.Walk(*maildirPath, func(path string) error {
b, err := ioutil.ReadFile(path)
if err != nil {
return err
}
if _, err := w.Write(b); err != nil {
return err
}
count++
if count%1000 == 0 {
glog.Infof("Processed %d files", count)
}
return nil
}); err != nil {
glog.Exitf("Failed to walk %q: %v", *maildirPath, err)
}
if *compress {
if err := gzf.Flush(); err != nil {
glog.Exitf("Failed to flush gzipper on %q: %v", *recordIOPath, err)
}
}
if err := f.Close(); err != nil {
glog.Exitf("Failed to close %q: %v", *recordIOPath, err)
}
}

155
src/bin/fingerprint.rs Normal file
View File

@@ -0,0 +1,155 @@
use std::{
collections::HashMap,
fs::File,
path::Path,
time::{SystemTime, UNIX_EPOCH},
};
use clap::Parser;
use email::{fingerprint, should_skip, EmailError};
use mailparse::{dateparse, parse_mail, MailHeaderMap};
use memmap::MmapOptions;
use rayon::{iter::ParallelBridge, prelude::ParallelIterator};
use walkdir::WalkDir;
/// Simple program to greet a person
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Number days to search through
#[arg(short, long, default_value_t = 365)]
days: i64,
/// Enable verbose logging
#[arg(short, long, default_value_t = false)]
verbose: bool,
/// Number of example filenames to print
#[arg(short, long, default_value_t = 1)]
examples: usize,
/// Show top N fingerprints
#[arg(short, long, default_value_t = 10)]
top_n: usize,
/// Include subpart headers keys in fingerprint
#[arg(short, long)]
include_header_keys: bool,
/// Include subpart params in fingerprint
#[arg(long)]
include_params: bool,
/// List of input directories to recursively search
input_dir: String,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
// Just check messages from the last N days.
let max_age = 60 * 60 * 24 * args.days;
let start = std::time::Instant::now();
let unix_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("couldn't get unix time");
let youngest = unix_secs.as_secs() as i64 - max_age;
let dir = args.input_dir.clone();
let map = WalkDir::new(&dir)
.into_iter()
.par_bridge()
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
if entry.file_type().is_dir() {
if args.verbose {
println!(
"{} ",
entry
.path()
.strip_prefix(&dir)
.expect("failed to strip dir")
.display()
);
}
return None;
}
let arg = entry.path().display().to_string();
if should_skip(&arg) {
return None;
}
match parse(
&arg,
youngest,
args.include_header_keys,
args.include_params,
) {
Ok(Some(h)) => Some((h, arg)),
// Skip old emails
Ok(None) => return None,
Err(e) => {
eprintln!("{}: failed {}", arg, e);
None
}
}
})
.fold(
|| HashMap::new(),
|mut m, (h, arg)| {
m.entry(h).or_insert(Vec::new()).push(arg);
m
},
)
// Merge maps created by parallel iteration.
.reduce(
|| HashMap::new(),
|mut acc, m| {
for (k, v) in m {
acc.entry(k).or_insert(Vec::new()).extend(v);
}
acc
},
);
let mut res: Vec<_> = map
.into_iter()
.map(|(hash, paths)| {
(
paths.len(),
hash,
paths.into_iter().take(args.examples).collect::<Vec<_>>(),
)
})
.collect();
res.sort();
res.reverse();
for (cnt, hash, ex) in res.into_iter().take(args.top_n) {
println!("{cnt} {}\n{hash}\n", ex.join("\n"));
}
println!("Runtime: {:.2}s", start.elapsed().as_secs_f32());
Ok(())
}
// If the date in the email is before youngest Ok(None) will be returned.
fn parse<P: AsRef<Path>>(
path: P,
youngest: i64,
include_header_keys: bool,
include_params: bool,
) -> Result<Option<String>, EmailError> {
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let d = dateparse(
m.headers
.get_first_value("Date")
.unwrap_or("".to_string())
.as_str(),
)
.unwrap_or(0);
if d < youngest {
return Ok(None);
}
//println!("{}: {:#?}", path.as_ref().display(), m.ctype);
Ok(Some(
fingerprint(&m, include_header_keys, include_params).join("\n"),
))
}

40
src/bin/summarize.rs Normal file
View File

@@ -0,0 +1,40 @@
use std::fs::File;
use clap::Parser;
use email::fingerprint;
use mailparse::{parse_mail, MailHeaderMap};
use memmap::MmapOptions;
/// Use library to summarize information about given mail files
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Include subpart headers keys in fingerprint
#[arg(short, long)]
include_header_keys: bool,
/// Include subpart params in fingerprint
#[arg(long)]
include_params: bool,
/// List of files to summarize
paths: Vec<String>,
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
for path in args.paths {
let file = File::open(&path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let m = parse_mail(&mmap)?;
let subject = m
.headers
.get_first_value("subject")
.unwrap_or("(NO SUBJECT)".to_owned());
println!(
"{subject}: {path}\n{}",
fingerprint(&m, args.include_header_keys, args.include_params).join("\n")
);
}
Ok(())
}

View File

@@ -1,6 +1,6 @@
use std::{fs::File, path::Path};
use mailparse::{parse_headers, MailHeader, MailHeaderMap, MailParseError};
use mailparse::{parse_headers, MailHeader, MailHeaderMap, MailParseError, ParsedMail};
use memmap::MmapOptions;
use sha1::{Digest, Sha1};
use thiserror::Error;
@@ -44,6 +44,64 @@ pub fn hash_headers(hdrs: &[MailHeader]) -> String {
format!("{:x}", hasher.finalize())
}
// Returns true if the last part of path is known to be a non-mail file commonly found under
// Maildir/
const SKIP_FILES: &[&str] = &[
"docdata.glass",
"flintlock",
"iamglass",
".mbsyncstate",
"position.glass",
"postlist.glass",
"termlist.glass",
".uidvalidity",
];
pub fn should_skip(path: &str) -> bool {
let filename: &str = if let Some(idx) = path.rfind("/") {
&path[idx + 1..]
} else {
path
};
SKIP_FILES.contains(&filename)
}
pub fn fingerprint(
pm: &ParsedMail<'_>,
include_header_keys: bool,
include_params: bool,
) -> Vec<String> {
fingerprint_rec(pm, include_header_keys, include_params, 0)
}
fn fingerprint_rec(
pm: &ParsedMail<'_>,
include_header_keys: bool,
include_params: bool,
depth: usize,
) -> Vec<String> {
let indent = " ".repeat(depth * 2);
let mut parts = vec![format!("{}{}", indent, pm.ctype.mimetype)];
if include_header_keys {
parts.push(format!("{indent}Headers:"));
for k in &pm.headers {
parts.push(format!("{indent} {}", k.get_key()));
}
}
if include_params {
for (k, v) in &pm.ctype.params {
parts.push(format!("{indent} {k}: {v}"));
}
}
for c in &pm.subparts {
parts.extend(fingerprint_rec(
&c,
include_header_keys,
include_params,
depth + 1,
));
}
parts
}
#[cfg(test)]
mod tests {
use mailparse::{parse_headers, MailHeader, MailHeaderMap};