Use expvar for message stats.

This commit is contained in:
Bill Thiede 2014-03-24 23:21:38 -07:00
parent 06310ad1cc
commit c7bcdde1e5

View File

@ -4,11 +4,13 @@ package main
import ( import (
"bytes" "bytes"
"database/sql" "database/sql"
"expvar"
"flag" "flag"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"time" "time"
@ -28,6 +30,10 @@ var (
// Hashed over fields from each message. // Hashed over fields from each message.
headers = []string{"to", "from", "cc", "date", "subject", "message-id"} headers = []string{"to", "from", "cc", "date", "subject", "message-id"}
total = expvar.NewInt("bytes-parsed")
cnt = expvar.NewInt("messages-parsed")
dupCnt = expvar.NewInt("duplicates-found")
) )
var CRCR = []byte("\n\n") var CRCR = []byte("\n\n")
@ -35,11 +41,9 @@ var CRCR = []byte("\n\n")
func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error { func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error {
dup := set.NewStrings() dup := set.NewStrings()
start := time.Now() start := time.Now()
cnt := 0
dupCnt := 0
defer func() { defer func() {
glog.Infof("%d messages processed in %s", cnt, time.Since(start)) glog.Infof("%s messages processed in %s", cnt, time.Since(start))
glog.Infof("%d dups found", dupCnt) glog.Infof("%s dups found", dupCnt)
}() }()
txn, err := db.Begin() txn, err := db.Begin()
@ -65,7 +69,6 @@ func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error {
return err return err
} }
var total int
err = filepath.Walk(root, err = filepath.Walk(root,
func(path string, info os.FileInfo, err error) error { func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
@ -106,24 +109,32 @@ func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error {
if dup.Contains(chksum) { if dup.Contains(chksum) {
glog.Warningln("Dup ", chksum, path[len(root)+1:], len(b)) glog.Warningln("Dup ", chksum, path[len(root)+1:], len(b))
dupCnt++ dupCnt.Add(1)
return nil return nil
} }
dup.Add(chksum) dup.Add(chksum)
n := len(b) n := len(b)
total += n total.Add(int64(n))
delta := time.Since(start) delta := time.Since(start)
if cnt%1000 == 0 { i, err := strconv.Atoi(cnt.String())
glog.Infof("%d messages processed in %s: %.2f msg/s %s/s", cnt, if err != nil {
delta, float64(cnt)/delta.Seconds(), return err
types.Base2Size(float64(total)/delta.Seconds())) }
if i%1000 == 0 {
t, err := strconv.Atoi(total.String())
if err != nil {
return err
}
glog.Infof("%d messages processed in %s: %.2f msg/s %s/s", i,
delta, float64(i)/delta.Seconds(),
types.Base2Size(float64(t)/delta.Seconds()))
} }
if _, err := stmt.Exec(uid, chksum, hdr_size, n, b); err != nil { if _, err := stmt.Exec(uid, chksum, hdr_size, n, b); err != nil {
return err return err
} }
cnt++ cnt.Add(1)
return nil return nil
}) })