162 lines
2.8 KiB
Go
162 lines
2.8 KiB
Go
package main
|
|
|
|
import (
|
|
"crypto/sha1"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
var root = flag.String("root", "", "root directory to search for dups")
|
|
|
|
var debug = debugT(false)
|
|
|
|
type debugT bool
|
|
|
|
func (d debugT) Print(args ...interface{}) {
|
|
if d {
|
|
log.Print(args...)
|
|
}
|
|
}
|
|
|
|
func (d debugT) Printf(format string, args ...interface{}) {
|
|
if d {
|
|
log.Printf(format, args...)
|
|
}
|
|
}
|
|
|
|
type Checksum struct {
|
|
Checksum, Filename string
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
if *root == "" {
|
|
log.Fatal("Must specify root")
|
|
}
|
|
|
|
numWorkers := 10
|
|
fns := make(chan string, numWorkers)
|
|
sums := make(chan Checksum, numWorkers)
|
|
done := make(chan bool)
|
|
|
|
for i := 0; i < numWorkers; i++ {
|
|
go checksumer(fns, sums, done)
|
|
}
|
|
go computeDups(sums, done)
|
|
//go printer(sums, done)
|
|
|
|
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
fns <- path
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
log.Fatalf("Failed to walk %s: %v", *root, err)
|
|
}
|
|
|
|
close(fns)
|
|
|
|
for i := 0; i < numWorkers; i++ {
|
|
<-done
|
|
}
|
|
|
|
close(sums)
|
|
<-done
|
|
}
|
|
|
|
func removeDups(dups map[string][]string) {
|
|
log.Print("Removing dups")
|
|
for _, fns := range dups {
|
|
if len(fns) > 1 {
|
|
log.Print("Skipping: ", fns[0])
|
|
log.Print("Deleting")
|
|
for _, fn := range fns[1:] {
|
|
log.Print("\t", fn)
|
|
err := os.Remove(fn)
|
|
if err != nil {
|
|
log.Fatalf("Failed to remove file: %s", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func computeDups(sums <-chan Checksum, done chan bool) {
|
|
dups := map[string][]string{}
|
|
count := 0
|
|
for sum := range sums {
|
|
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
|
|
|
|
if count > 0 && (count%1000) == 0 {
|
|
log.Printf("Processed %d files", count)
|
|
}
|
|
count++
|
|
}
|
|
|
|
count = 0
|
|
for _, fns := range dups {
|
|
if len(fns) > 1 {
|
|
count++
|
|
}
|
|
}
|
|
|
|
if count == 0 {
|
|
log.Print("No dups found")
|
|
} else {
|
|
log.Printf("Found %d files with dups, delete them? [y/n]", count)
|
|
var answer string
|
|
fmt.Scan(&answer)
|
|
if strings.HasPrefix(strings.ToLower(answer), "y") {
|
|
removeDups(dups)
|
|
}
|
|
}
|
|
|
|
done <- true
|
|
}
|
|
|
|
func printer(sums <-chan Checksum, done chan bool) {
|
|
for sum := range sums {
|
|
fmt.Println(sum.Checksum, sum.Filename)
|
|
}
|
|
done <- true
|
|
}
|
|
|
|
func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) {
|
|
for fn := range fns {
|
|
f, err := os.Open(fn)
|
|
if err != nil {
|
|
log.Print("Error opening file: ", err)
|
|
continue
|
|
}
|
|
fi, err := f.Stat()
|
|
if err != nil {
|
|
log.Print("Error stating file: ", err)
|
|
} else {
|
|
debug.Printf("Mode %s for %q", fi.Mode(), fn)
|
|
if (fi.Mode() & os.ModeType) != 0 {
|
|
debug.Print("Skipping non-file ", fn)
|
|
}
|
|
}
|
|
|
|
h := sha1.New()
|
|
io.Copy(h, f)
|
|
f.Close()
|
|
sums <- Checksum{
|
|
Checksum: fmt.Sprintf("%x", h.Sum(nil)),
|
|
Filename: fn,
|
|
}
|
|
}
|
|
done <- true
|
|
}
|