Use walk instead of glob to scale being shell's limit to expand *

This commit is contained in:
Bill Thiede 2013-08-22 21:44:30 -07:00
parent 7a90676cda
commit b48ebf217c

View File

@ -11,9 +11,7 @@ import (
"strings"
)
var totalFiles = 0
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
var root = flag.String("root", "", "root directory to search for dups")
var debug = debugT(false)
@ -38,12 +36,9 @@ type Checksum struct {
func main() {
flag.Parse()
files, err := filepath.Glob(os.ExpandEnv(*filePat))
if err != nil {
log.Fatalf("Failed to glob %q: %s", *filePat, err)
if *root == "" {
log.Fatal("Must specify root")
}
totalFiles = len(files)
debug.Printf("Found %d files", totalFiles)
numWorkers := 10
fns := make(chan string, numWorkers)
@ -56,8 +51,18 @@ func main() {
go computeDups(sums, done)
//go printer(sums, done)
for _, fn := range files {
fns <- fn
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
fns <- path
return nil
})
if err != nil {
log.Fatalf("Failed to walk %s: %v", *root, err)
}
close(fns)
@ -94,7 +99,7 @@ func computeDups(sums <-chan Checksum, done chan bool) {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count%1000) == 0 {
log.Printf("Processed %d/%d files", count, totalFiles)
log.Printf("Processed %d files", count)
}
count++
}