package main import ( "bytes" "flag" "fmt" "net/mail" "sort" "strings" "time" "github.com/golang/glog" "github.com/syndtr/goleveldb/leveldb" "github.com/syndtr/goleveldb/leveldb/util" ) var levelDBPath = flag.String("db", "", "level DB path for storing email") type sortedIntMap struct { m map[string]int s []string } func (sm *sortedIntMap) Len() int { return len(sm.m) } func (sm *sortedIntMap) Less(i, j int) bool { return sm.m[sm.s[i]] > sm.m[sm.s[j]] } func (sm *sortedIntMap) Swap(i, j int) { sm.s[i], sm.s[j] = sm.s[j], sm.s[i] } func sortIntMap(m map[string]int) []string { sm := new(sortedIntMap) sm.m = m sm.s = make([]string, len(m)) i := 0 for key, _ := range m { sm.s[i] = key i++ } sort.Sort(sm) return sm.s } type sortedStringSliceMap struct { m map[string][]string s []string } func (sm *sortedStringSliceMap) Len() int { return len(sm.m) } func (sm *sortedStringSliceMap) Less(i, j int) bool { return len(sm.m[sm.s[i]]) > len(sm.m[sm.s[j]]) } func (sm *sortedStringSliceMap) Swap(i, j int) { sm.s[i], sm.s[j] = sm.s[j], sm.s[i] } func sortStringSliceMap(m map[string][]string) []string { sm := new(sortedStringSliceMap) sm.m = m sm.s = make([]string, len(m)) i := 0 for key, _ := range m { sm.s[i] = key i++ } sort.Sort(sm) return sm.s } func count(ss []string) string { c := map[string]int{} for _, s := range ss { c[s]++ } var out []string for _, s := range sortIntMap(c) { out = append(out, fmt.Sprintf("(%d) %s", c[s], s)) } return strings.Join(out, " ") } func stats(db *leveldb.DB) error { start := time.Now() it := db.NewIterator(&util.Range{}, nil) defer it.Release() cnt := 0 allAddrs := map[string][]string{} for it.Next() { cnt++ if cnt%1000 == 0 { //TODO XXX TEST //break glog.Infof("Processed %d messages in %v", cnt, time.Since(start)) } m, err := mail.ReadMessage(bytes.NewReader(it.Value())) if err != nil { return err } for _, h := range []string{"To", "Cc", "From"} { addrs, err := m.Header.AddressList(h) if err != nil { continue } for _, a := range addrs { k := strings.ToLower(a.Address) allAddrs[k] = append(allAddrs[k], a.Name) } } } fmt.Printf("Processed %d messages in %v\n", cnt, time.Since(start)) fmt.Printf("Found %d addresses\n", len(allAddrs)) for _, addr := range sortStringSliceMap(allAddrs) { fmt.Printf(" %s: %s\n", addr, count(allAddrs[addr])) } return it.Error() } func main() { flag.Parse() defer glog.Flush() db, err := leveldb.OpenFile(*levelDBPath, nil) if err != nil { glog.Exitf("Error opening leveldb: %v", err) } defer func() { if err := db.Close(); err != nil { glog.Errorf("Error closing %q: %v", *levelDBPath, err) } }() if err := stats(db); err != nil { glog.Exitf("Failed to compute stats: %v", err) } }