Use walk instead of glob to scale being shell's limit to expand *

This commit is contained in:
Bill Thiede 2013-08-22 21:44:30 -07:00
parent 7a90676cda
commit b48ebf217c

View File

@ -11,9 +11,7 @@ import (
"strings" "strings"
) )
var totalFiles = 0 var root = flag.String("root", "", "root directory to search for dups")
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
var debug = debugT(false) var debug = debugT(false)
@ -38,31 +36,38 @@ type Checksum struct {
func main() { func main() {
flag.Parse() flag.Parse()
files, err := filepath.Glob(os.ExpandEnv(*filePat)) if *root == "" {
if err != nil { log.Fatal("Must specify root")
log.Fatalf("Failed to glob %q: %s", *filePat, err)
} }
totalFiles = len(files)
debug.Printf("Found %d files", totalFiles)
numWorkers := 10 numWorkers := 10
fns := make(chan string, numWorkers) fns := make(chan string, numWorkers)
sums := make(chan Checksum, numWorkers) sums := make(chan Checksum, numWorkers)
done := make(chan bool) done := make(chan bool)
for i:=0; i< numWorkers; i++ { for i := 0; i < numWorkers; i++ {
go checksumer(fns, sums, done) go checksumer(fns, sums, done)
} }
go computeDups(sums, done) go computeDups(sums, done)
//go printer(sums, done) //go printer(sums, done)
for _, fn := range files { err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
fns <- fn if err != nil {
return err
}
if info.IsDir() {
return nil
}
fns <- path
return nil
})
if err != nil {
log.Fatalf("Failed to walk %s: %v", *root, err)
} }
close(fns) close(fns)
for i:=0; i< numWorkers; i++ { for i := 0; i < numWorkers; i++ {
<-done <-done
} }
@ -93,8 +98,8 @@ func computeDups(sums <-chan Checksum, done chan bool) {
for sum := range sums { for sum := range sums {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count % 1000) == 0 { if count > 0 && (count%1000) == 0 {
log.Printf("Processed %d/%d files", count, totalFiles) log.Printf("Processed %d files", count)
} }
count++ count++
} }