package main import ( "crypto/sha1" "flag" "fmt" "io" "log" "os" "path/filepath" "strings" ) var totalFiles = 0 var filePat = flag.String("filepat", "*", "Glob for list of files to dedup") var debug = debugT(false) type debugT bool func (d debugT) Print(args ...interface{}) { if d { log.Print(args...) } } func (d debugT) Printf(format string, args ...interface{}) { if d { log.Printf(format, args...) } } type Checksum struct { Checksum, Filename string } func main() { flag.Parse() files, err := filepath.Glob(os.ExpandEnv(*filePat)) if err != nil { log.Fatalf("Failed to glob %q: %s", *filePat, err) } totalFiles = len(files) debug.Printf("Found %d files", totalFiles) numWorkers := 10 fns := make(chan string, numWorkers) sums := make(chan Checksum, numWorkers) done := make(chan bool) for i:=0; i< numWorkers; i++ { go checksumer(fns, sums, done) } go computeDups(sums, done) //go printer(sums, done) for _, fn := range files { fns <- fn } close(fns) for i:=0; i< numWorkers; i++ { <-done } close(sums) <-done } func removeDups(dups map[string][]string) { log.Print("Removing dups") for _, fns := range dups { if len(fns) > 1 { log.Print("Skipping: ", fns[0]) log.Print("Deleting") for _, fn := range fns[1:] { log.Print("\t", fn) err := os.Remove(fn) if err != nil { log.Fatalf("Failed to remove file: %s", err) } } } } } func computeDups(sums <-chan Checksum, done chan bool) { dups := map[string][]string{} count := 0 for sum := range sums { dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) if count > 0 && (count % 1000) == 0 { log.Printf("Processed %d/%d files", count, totalFiles) } count++ } count = 0 for _, fns := range dups { if len(fns) > 1 { count++ } } if count == 0 { log.Print("No dups found") } else { log.Printf("Found %d files with dups, delete them? [y/n]", count) var answer string fmt.Scan(&answer) if strings.HasPrefix(strings.ToLower(answer), "y") { removeDups(dups) } } done <- true } func printer(sums <-chan Checksum, done chan bool) { for sum := range sums { fmt.Println(sum.Checksum, sum.Filename) } done <- true } func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) { for fn := range fns { f, err := os.Open(fn) if err != nil { log.Print("Error opening file: ", err) continue } fi, err := f.Stat() if err != nil { log.Print("Error stating file: ", err) } else { debug.Printf("Mode %s for %q", fi.Mode(), fn) if (fi.Mode() & os.ModeType) != 0 { debug.Print("Skipping non-file ", fn) } } h := sha1.New() io.Copy(h, f) f.Close() sums <- Checksum{ Checksum: fmt.Sprintf("%x", h.Sum(nil)), Filename: fn, } } done <- true }