Use walk instead of glob to scale being shell's limit to expand *
This commit is contained in:
parent
7a90676cda
commit
b48ebf217c
@ -11,9 +11,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
var totalFiles = 0
|
||||
|
||||
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
|
||||
var root = flag.String("root", "", "root directory to search for dups")
|
||||
|
||||
var debug = debugT(false)
|
||||
|
||||
@ -38,31 +36,38 @@ type Checksum struct {
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
files, err := filepath.Glob(os.ExpandEnv(*filePat))
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to glob %q: %s", *filePat, err)
|
||||
if *root == "" {
|
||||
log.Fatal("Must specify root")
|
||||
}
|
||||
totalFiles = len(files)
|
||||
debug.Printf("Found %d files", totalFiles)
|
||||
|
||||
numWorkers := 10
|
||||
fns := make(chan string, numWorkers)
|
||||
sums := make(chan Checksum, numWorkers)
|
||||
done := make(chan bool)
|
||||
|
||||
for i:=0; i< numWorkers; i++ {
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
go checksumer(fns, sums, done)
|
||||
}
|
||||
go computeDups(sums, done)
|
||||
//go printer(sums, done)
|
||||
|
||||
for _, fn := range files {
|
||||
fns <- fn
|
||||
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
fns <- path
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to walk %s: %v", *root, err)
|
||||
}
|
||||
|
||||
close(fns)
|
||||
|
||||
for i:=0; i< numWorkers; i++ {
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
@ -93,8 +98,8 @@ func computeDups(sums <-chan Checksum, done chan bool) {
|
||||
for sum := range sums {
|
||||
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
|
||||
|
||||
if count > 0 && (count % 1000) == 0 {
|
||||
log.Printf("Processed %d/%d files", count, totalFiles)
|
||||
if count > 0 && (count%1000) == 0 {
|
||||
log.Printf("Processed %d files", count)
|
||||
}
|
||||
count++
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user