// md2pg imports a Maildir format file into a PostgreSQL DB package main import ( "bytes" "database/sql" "flag" "fmt" "io" "os" "path/filepath" "strings" "time" "github.com/golang/glog" "github.com/lib/pq" "xinu.tv/email" "xinu.tv/set" ) var ( maildir = flag.String("maildir", "", "Maildir root") skipFiles = flag.String("skip", "maildirfolder,log,msgid.cache,razor-agent.log", "comma separated files to skip") // Hashed over fields from each message. headers = []string{"to", "from", "cc", "date", "subject"} ) var CRCR = []byte("\n\n") func Load(db *sql.DB, root string, skip *set.StringSet) error { dup := set.NewStrings() start := time.Now() cnt := 0 dupCnt := 0 defer func() { glog.Infof("%d messages processed in %s", cnt, time.Since(start)) glog.Infof("%d dups found", dupCnt) }() txn, err := db.Begin() if err != nil { return err } stmt, err := txn.Prepare(pq.CopyIn("original", "hash", "header_size", "total_size", "blob")) if err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("txn.Prepare error rolling back", err) } return err } b := new(bytes.Buffer) err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } base := filepath.Base(path) if skip.Contains(base) { return nil } b.Reset() r, err := os.Open(path) if err != nil { return err } defer r.Close() n, err := io.Copy(b, r) if err != nil { return err } blob := b.Bytes() hdr_size := bytes.Index(blob, CRCR) h, err := email.Hash(bytes.NewReader(blob), headers) if err != nil { glog.Errorf("%s not an mail file", path) glog.Infof("%q", err.Error()) return nil } chksum := fmt.Sprintf("%x", h.Sum(nil)) if dup.Contains(chksum) { glog.Warningln("Dup email found", chksum, path, len(blob)) dupCnt++ return nil } dup.Add(chksum) glog.Infoln(chksum, hdr_size, n) if _, err := stmt.Exec(chksum, hdr_size, n, blob); err != nil { return err } cnt++ return nil }) if _, err := stmt.Exec(); err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("stmt.Exec error rolling back", err) } return err } if err := stmt.Close(); err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("stmt.Close error rolling back", err) } return err } return txn.Commit() } func main() { defer glog.Flush() flag.Parse() // TODO(wathiede): make a set of flags. db, err := sql.Open("postgres", "user=gomail dbname=gomail sslmode=disable") if err != nil { glog.Fatal(err) } if *maildir == "" { fmt.Println("Must specify Maildir with -maildir") os.Exit(1) } skip := set.NewStrings(strings.Split(*skipFiles, ",")...) glog.Infoln("Skip files", skip) if err := Load(db, *maildir, skip); err != nil { glog.Fatal(err) } }