Use walk instead of glob to scale being shell's limit to expand *
This commit is contained in:
parent
7a90676cda
commit
b48ebf217c
@ -11,9 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var totalFiles = 0
|
var root = flag.String("root", "", "root directory to search for dups")
|
||||||
|
|
||||||
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
|
|
||||||
|
|
||||||
var debug = debugT(false)
|
var debug = debugT(false)
|
||||||
|
|
||||||
@ -38,31 +36,38 @@ type Checksum struct {
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
files, err := filepath.Glob(os.ExpandEnv(*filePat))
|
if *root == "" {
|
||||||
if err != nil {
|
log.Fatal("Must specify root")
|
||||||
log.Fatalf("Failed to glob %q: %s", *filePat, err)
|
|
||||||
}
|
}
|
||||||
totalFiles = len(files)
|
|
||||||
debug.Printf("Found %d files", totalFiles)
|
|
||||||
|
|
||||||
numWorkers := 10
|
numWorkers := 10
|
||||||
fns := make(chan string, numWorkers)
|
fns := make(chan string, numWorkers)
|
||||||
sums := make(chan Checksum, numWorkers)
|
sums := make(chan Checksum, numWorkers)
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
|
|
||||||
for i:=0; i< numWorkers; i++ {
|
for i := 0; i < numWorkers; i++ {
|
||||||
go checksumer(fns, sums, done)
|
go checksumer(fns, sums, done)
|
||||||
}
|
}
|
||||||
go computeDups(sums, done)
|
go computeDups(sums, done)
|
||||||
//go printer(sums, done)
|
//go printer(sums, done)
|
||||||
|
|
||||||
for _, fn := range files {
|
err := filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
|
||||||
fns <- fn
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if info.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
fns <- path
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to walk %s: %v", *root, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fns)
|
close(fns)
|
||||||
|
|
||||||
for i:=0; i< numWorkers; i++ {
|
for i := 0; i < numWorkers; i++ {
|
||||||
<-done
|
<-done
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,8 +98,8 @@ func computeDups(sums <-chan Checksum, done chan bool) {
|
|||||||
for sum := range sums {
|
for sum := range sums {
|
||||||
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
|
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
|
||||||
|
|
||||||
if count > 0 && (count % 1000) == 0 {
|
if count > 0 && (count%1000) == 0 {
|
||||||
log.Printf("Processed %d/%d files", count, totalFiles)
|
log.Printf("Processed %d files", count)
|
||||||
}
|
}
|
||||||
count++
|
count++
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user