package main import ( "crypto/sha1" "flag" "fmt" "io" "log" "os" "path/filepath" "strings" ) var root = flag.String("root", "", "root directory to search for dups") var debug = debugT(false) type debugT bool func (d debugT) Print(args ...interface{}) { if d { log.Print(args...) } } func (d debugT) Printf(format string, args ...interface{}) { if d { log.Printf(format, args...) } } type Checksum struct { Checksum, Filename string } func main() { flag.Parse() if *root == "" { log.Fatal("Must specify root") } numWorkers := 10 fns := make(chan string, numWorkers) sums := make(chan Checksum, numWorkers) done := make(chan bool) for i := 0; i < numWorkers; i++ { go checksumer(fns, sums, done) } go computeDups(sums, done) //go printer(sums, done) err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } fns <- path return nil }) if err != nil { log.Fatalf("Failed to walk %s: %v", *root, err) } close(fns) for i := 0; i < numWorkers; i++ { <-done } close(sums) <-done } func removeDups(dups map[string][]string) { log.Print("Removing dups") for _, fns := range dups { if len(fns) > 1 { log.Print("Skipping: ", fns[0]) log.Print("Deleting") for _, fn := range fns[1:] { log.Print("\t", fn) err := os.Remove(fn) if err != nil { log.Fatalf("Failed to remove file: %s", err) } } } } } func computeDups(sums <-chan Checksum, done chan bool) { dups := map[string][]string{} count := 0 for sum := range sums { dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) if count > 0 && (count%1000) == 0 { log.Printf("Processed %d files", count) } count++ } count = 0 for _, fns := range dups { if len(fns) > 1 { count++ } } if count == 0 { log.Print("No dups found") } else { log.Printf("Found %d files with dups, delete them? [y/n]", count) var answer string fmt.Scan(&answer) if strings.HasPrefix(strings.ToLower(answer), "y") { removeDups(dups) } } done <- true } func printer(sums <-chan Checksum, done chan bool) { for sum := range sums { fmt.Println(sum.Checksum, sum.Filename) } done <- true } func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) { for fn := range fns { f, err := os.Open(fn) if err != nil { log.Print("Error opening file: ", err) continue } fi, err := f.Stat() if err != nil { log.Print("Error stating file: ", err) } else { debug.Printf("Mode %s for %q", fi.Mode(), fn) if (fi.Mode() & os.ModeType) != 0 { debug.Print("Skipping non-file ", fn) } } h := sha1.New() io.Copy(h, f) f.Close() sums <- Checksum{ Checksum: fmt.Sprintf("%x", h.Sum(nil)), Filename: fn, } } done <- true }