diff --git a/dedup/dedup.go b/dedup/dedup.go index 2e96ab5..2b1a33e 100644 --- a/dedup/dedup.go +++ b/dedup/dedup.go @@ -11,9 +11,7 @@ import ( "strings" ) -var totalFiles = 0 - -var filePat = flag.String("filepat", "*", "Glob for list of files to dedup") +var root = flag.String("root", "", "root directory to search for dups") var debug = debugT(false) @@ -38,31 +36,38 @@ type Checksum struct { func main() { flag.Parse() - files, err := filepath.Glob(os.ExpandEnv(*filePat)) - if err != nil { - log.Fatalf("Failed to glob %q: %s", *filePat, err) + if *root == "" { + log.Fatal("Must specify root") } - totalFiles = len(files) - debug.Printf("Found %d files", totalFiles) numWorkers := 10 fns := make(chan string, numWorkers) sums := make(chan Checksum, numWorkers) done := make(chan bool) - for i:=0; i< numWorkers; i++ { + for i := 0; i < numWorkers; i++ { go checksumer(fns, sums, done) } go computeDups(sums, done) //go printer(sums, done) - for _, fn := range files { - fns <- fn + err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + fns <- path + return nil + }) + if err != nil { + log.Fatalf("Failed to walk %s: %v", *root, err) } close(fns) - for i:=0; i< numWorkers; i++ { + for i := 0; i < numWorkers; i++ { <-done } @@ -93,8 +98,8 @@ func computeDups(sums <-chan Checksum, done chan bool) { for sum := range sums { dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) - if count > 0 && (count % 1000) == 0 { - log.Printf("Processed %d/%d files", count, totalFiles) + if count > 0 && (count%1000) == 0 { + log.Printf("Processed %d files", count) } count++ }