Use walk instead of glob to scale being shell's limit to expand *

This commit is contained in:
Bill Thiede 2013-08-22 21:44:30 -07:00
parent 7a90676cda
commit b48ebf217c

View File

@ -11,9 +11,7 @@ import (
"strings" "strings"
) )
var totalFiles = 0 var root = flag.String("root", "", "root directory to search for dups")
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
var debug = debugT(false) var debug = debugT(false)
@ -38,12 +36,9 @@ type Checksum struct {
func main() { func main() {
flag.Parse() flag.Parse()
files, err := filepath.Glob(os.ExpandEnv(*filePat)) if *root == "" {
if err != nil { log.Fatal("Must specify root")
log.Fatalf("Failed to glob %q: %s", *filePat, err)
} }
totalFiles = len(files)
debug.Printf("Found %d files", totalFiles)
numWorkers := 10 numWorkers := 10
fns := make(chan string, numWorkers) fns := make(chan string, numWorkers)
@ -56,8 +51,18 @@ func main() {
go computeDups(sums, done) go computeDups(sums, done)
//go printer(sums, done) //go printer(sums, done)
for _, fn := range files { err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
fns <- fn if err != nil {
return err
}
if info.IsDir() {
return nil
}
fns <- path
return nil
})
if err != nil {
log.Fatalf("Failed to walk %s: %v", *root, err)
} }
close(fns) close(fns)
@ -94,7 +99,7 @@ func computeDups(sums <-chan Checksum, done chan bool) {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count%1000) == 0 { if count > 0 && (count%1000) == 0 {
log.Printf("Processed %d/%d files", count, totalFiles) log.Printf("Processed %d files", count)
} }
count++ count++
} }