email/cmd/ldbstats/ldbstats.go

145 lines
2.8 KiB
Go

package main
import (
"bytes"
"flag"
"fmt"
"net/mail"
"sort"
"strings"
"time"
"github.com/golang/glog"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/util"
)
var levelDBPath = flag.String("db", "", "level DB path for storing email")
type sortedIntMap struct {
m map[string]int
s []string
}
func (sm *sortedIntMap) Len() int {
return len(sm.m)
}
func (sm *sortedIntMap) Less(i, j int) bool {
return sm.m[sm.s[i]] > sm.m[sm.s[j]]
}
func (sm *sortedIntMap) Swap(i, j int) {
sm.s[i], sm.s[j] = sm.s[j], sm.s[i]
}
func sortIntMap(m map[string]int) []string {
sm := new(sortedIntMap)
sm.m = m
sm.s = make([]string, len(m))
i := 0
for key, _ := range m {
sm.s[i] = key
i++
}
sort.Sort(sm)
return sm.s
}
type sortedStringSliceMap struct {
m map[string][]string
s []string
}
func (sm *sortedStringSliceMap) Len() int {
return len(sm.m)
}
func (sm *sortedStringSliceMap) Less(i, j int) bool {
return len(sm.m[sm.s[i]]) > len(sm.m[sm.s[j]])
}
func (sm *sortedStringSliceMap) Swap(i, j int) {
sm.s[i], sm.s[j] = sm.s[j], sm.s[i]
}
func sortStringSliceMap(m map[string][]string) []string {
sm := new(sortedStringSliceMap)
sm.m = m
sm.s = make([]string, len(m))
i := 0
for key, _ := range m {
sm.s[i] = key
i++
}
sort.Sort(sm)
return sm.s
}
func count(ss []string) string {
c := map[string]int{}
for _, s := range ss {
c[s]++
}
var out []string
for _, s := range sortIntMap(c) {
out = append(out, fmt.Sprintf("(%d) %s", c[s], s))
}
return strings.Join(out, " ")
}
func stats(db *leveldb.DB) error {
start := time.Now()
it := db.NewIterator(&util.Range{}, nil)
defer it.Release()
cnt := 0
allAddrs := map[string][]string{}
for it.Next() {
cnt++
if cnt%1000 == 0 {
//TODO XXX TEST
//break
glog.Infof("Processed %d messages in %v", cnt, time.Since(start))
}
m, err := mail.ReadMessage(bytes.NewReader(it.Value()))
if err != nil {
return err
}
for _, h := range []string{"To", "Cc", "From"} {
addrs, err := m.Header.AddressList(h)
if err != nil {
continue
}
for _, a := range addrs {
k := strings.ToLower(a.Address)
allAddrs[k] = append(allAddrs[k], a.Name)
}
}
}
fmt.Printf("Processed %d messages in %v\n", cnt, time.Since(start))
fmt.Printf("Found %d addresses\n", len(allAddrs))
for _, addr := range sortStringSliceMap(allAddrs) {
fmt.Printf(" %s: %s\n", addr, count(allAddrs[addr]))
}
return it.Error()
}
func main() {
flag.Parse()
defer glog.Flush()
db, err := leveldb.OpenFile(*levelDBPath, nil)
if err != nil {
glog.Exitf("Error opening leveldb: %v", err)
}
defer func() {
if err := db.Close(); err != nil {
glog.Errorf("Error closing %q: %v", *levelDBPath, err)
}
}()
if err := stats(db); err != nil {
glog.Exitf("Failed to compute stats: %v", err)
}
}