Use walk instead of glob to scale being shell's limit to expand *

This commit is contained in:
Bill Thiede 2013-08-22 21:44:30 -07:00
parent 7a90676cda
commit b48ebf217c

View File

@ -11,9 +11,7 @@ import (
"strings"
)
var totalFiles = 0
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
var root = flag.String("root", "", "root directory to search for dups")
var debug = debugT(false)
@ -38,31 +36,38 @@ type Checksum struct {
func main() {
flag.Parse()
files, err := filepath.Glob(os.ExpandEnv(*filePat))
if err != nil {
log.Fatalf("Failed to glob %q: %s", *filePat, err)
if *root == "" {
log.Fatal("Must specify root")
}
totalFiles = len(files)
debug.Printf("Found %d files", totalFiles)
numWorkers := 10
fns := make(chan string, numWorkers)
sums := make(chan Checksum, numWorkers)
done := make(chan bool)
for i:=0; i< numWorkers; i++ {
for i := 0; i < numWorkers; i++ {
go checksumer(fns, sums, done)
}
go computeDups(sums, done)
//go printer(sums, done)
for _, fn := range files {
fns <- fn
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
fns <- path
return nil
})
if err != nil {
log.Fatalf("Failed to walk %s: %v", *root, err)
}
close(fns)
for i:=0; i< numWorkers; i++ {
for i := 0; i < numWorkers; i++ {
<-done
}
@ -93,8 +98,8 @@ func computeDups(sums <-chan Checksum, done chan bool) {
for sum := range sums {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count % 1000) == 0 {
log.Printf("Processed %d/%d files", count, totalFiles)
if count > 0 && (count%1000) == 0 {
log.Printf("Processed %d files", count)
}
count++
}