email/dedup/dedup.go

162 lines
2.8 KiB
Go

package main
import (
"crypto/sha1"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
)
var root = flag.String("root", "", "root directory to search for dups")
var debug = debugT(false)
type debugT bool
func (d debugT) Print(args ...interface{}) {
if d {
log.Print(args...)
}
}
func (d debugT) Printf(format string, args ...interface{}) {
if d {
log.Printf(format, args...)
}
}
type Checksum struct {
Checksum, Filename string
}
func main() {
flag.Parse()
if *root == "" {
log.Fatal("Must specify root")
}
numWorkers := 10
fns := make(chan string, numWorkers)
sums := make(chan Checksum, numWorkers)
done := make(chan bool)
for i := 0; i < numWorkers; i++ {
go checksumer(fns, sums, done)
}
go computeDups(sums, done)
//go printer(sums, done)
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
fns <- path
return nil
})
if err != nil {
log.Fatalf("Failed to walk %s: %v", *root, err)
}
close(fns)
for i := 0; i < numWorkers; i++ {
<-done
}
close(sums)
<-done
}
func removeDups(dups map[string][]string) {
log.Print("Removing dups")
for _, fns := range dups {
if len(fns) > 1 {
log.Print("Skipping: ", fns[0])
log.Print("Deleting")
for _, fn := range fns[1:] {
log.Print("\t", fn)
err := os.Remove(fn)
if err != nil {
log.Fatalf("Failed to remove file: %s", err)
}
}
}
}
}
func computeDups(sums <-chan Checksum, done chan bool) {
dups := map[string][]string{}
count := 0
for sum := range sums {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count%1000) == 0 {
log.Printf("Processed %d files", count)
}
count++
}
count = 0
for _, fns := range dups {
if len(fns) > 1 {
count++
}
}
if count == 0 {
log.Print("No dups found")
} else {
log.Printf("Found %d files with dups, delete them? [y/n]", count)
var answer string
fmt.Scan(&answer)
if strings.HasPrefix(strings.ToLower(answer), "y") {
removeDups(dups)
}
}
done <- true
}
func printer(sums <-chan Checksum, done chan bool) {
for sum := range sums {
fmt.Println(sum.Checksum, sum.Filename)
}
done <- true
}
func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) {
for fn := range fns {
f, err := os.Open(fn)
if err != nil {
log.Print("Error opening file: ", err)
continue
}
fi, err := f.Stat()
if err != nil {
log.Print("Error stating file: ", err)
} else {
debug.Printf("Mode %s for %q", fi.Mode(), fn)
if (fi.Mode() & os.ModeType) != 0 {
debug.Print("Skipping non-file ", fn)
}
}
h := sha1.New()
io.Copy(h, f)
f.Close()
sums <- Checksum{
Checksum: fmt.Sprintf("%x", h.Sum(nil)),
Filename: fn,
}
}
done <- true
}