Path cleanup.
This commit is contained in:
156
dedup/dedup.go
Normal file
156
dedup/dedup.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var totalFiles = 0
|
||||
|
||||
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
|
||||
|
||||
var debug = debugT(false)
|
||||
|
||||
type debugT bool
|
||||
|
||||
func (d debugT) Print(args ...interface{}) {
|
||||
if d {
|
||||
log.Print(args...)
|
||||
}
|
||||
}
|
||||
|
||||
func (d debugT) Printf(format string, args ...interface{}) {
|
||||
if d {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
type Checksum struct {
|
||||
Checksum, Filename string
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
files, err := filepath.Glob(os.ExpandEnv(*filePat))
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to glob %q: %s", *filePat, err)
|
||||
}
|
||||
totalFiles = len(files)
|
||||
debug.Printf("Found %d files", totalFiles)
|
||||
|
||||
numWorkers := 10
|
||||
fns := make(chan string, numWorkers)
|
||||
sums := make(chan Checksum, numWorkers)
|
||||
done := make(chan bool)
|
||||
|
||||
for i:=0; i< numWorkers; i++ {
|
||||
go checksumer(fns, sums, done)
|
||||
}
|
||||
go computeDups(sums, done)
|
||||
//go printer(sums, done)
|
||||
|
||||
for _, fn := range files {
|
||||
fns <- fn
|
||||
}
|
||||
|
||||
close(fns)
|
||||
|
||||
for i:=0; i< numWorkers; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
close(sums)
|
||||
<-done
|
||||
}
|
||||
|
||||
func removeDups(dups map[string][]string) {
|
||||
log.Print("Removing dups")
|
||||
for _, fns := range dups {
|
||||
if len(fns) > 1 {
|
||||
log.Print("Skipping: ", fns[0])
|
||||
log.Print("Deleting")
|
||||
for _, fn := range fns[1:] {
|
||||
log.Print("\t", fn)
|
||||
err := os.Remove(fn)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to remove file: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeDups(sums <-chan Checksum, done chan bool) {
|
||||
dups := map[string][]string{}
|
||||
count := 0
|
||||
for sum := range sums {
|
||||
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
|
||||
|
||||
if count > 0 && (count % 1000) == 0 {
|
||||
log.Printf("Processed %d/%d files", count, totalFiles)
|
||||
}
|
||||
count++
|
||||
}
|
||||
|
||||
count = 0
|
||||
for _, fns := range dups {
|
||||
if len(fns) > 1 {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
log.Print("No dups found")
|
||||
} else {
|
||||
log.Printf("Found %d files with dups, delete them? [y/n]", count)
|
||||
var answer string
|
||||
fmt.Scan(&answer)
|
||||
if strings.HasPrefix(strings.ToLower(answer), "y") {
|
||||
removeDups(dups)
|
||||
}
|
||||
}
|
||||
|
||||
done <- true
|
||||
}
|
||||
|
||||
func printer(sums <-chan Checksum, done chan bool) {
|
||||
for sum := range sums {
|
||||
fmt.Println(sum.Checksum, sum.Filename)
|
||||
}
|
||||
done <- true
|
||||
}
|
||||
|
||||
func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) {
|
||||
for fn := range fns {
|
||||
f, err := os.Open(fn)
|
||||
if err != nil {
|
||||
log.Print("Error opening file: ", err)
|
||||
continue
|
||||
}
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
log.Print("Error stating file: ", err)
|
||||
} else {
|
||||
debug.Printf("Mode %s for %q", fi.Mode(), fn)
|
||||
if (fi.Mode() & os.ModeType) != 0 {
|
||||
debug.Print("Skipping non-file ", fn)
|
||||
}
|
||||
}
|
||||
|
||||
h := sha1.New()
|
||||
io.Copy(h, f)
|
||||
f.Close()
|
||||
sums <- Checksum{
|
||||
Checksum: fmt.Sprintf("%x", h.Sum(nil)),
|
||||
Filename: fn,
|
||||
}
|
||||
}
|
||||
done <- true
|
||||
}
|
||||
Reference in New Issue
Block a user