// md2pg imports a Maildir format file into a PostgreSQL DB package main import ( "bytes" "database/sql" "flag" "fmt" "io/ioutil" "os" "path/filepath" "strings" "time" "github.com/golang/glog" _ "github.com/lib/pq" "xinu.tv/email" "xinu.tv/set" "xinu.tv/types" ) var ( maildir = flag.String("maildir", "", "Maildir root") username = flag.String("user", "wathiede", "username for email we're importing") skipFiles = flag.String("skip", "maildirfolder,log,msgid.cache,razor-agent.log", "comma separated files to skip") // Hashed over fields from each message. headers = []string{"to", "from", "cc", "date", "subject", "message-id"} ) var CRCR = []byte("\n\n") func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error { dup := set.NewStrings() start := time.Now() cnt := 0 dupCnt := 0 defer func() { glog.Infof("%d messages processed in %s", cnt, time.Since(start)) glog.Infof("%d dups found", dupCnt) }() txn, err := db.Begin() if err != nil { return err } stmt, err := txn.Prepare("INSERT INTO original (uid, hash, header_size, total_size, blob) VALUES ($1, $2, $3, $4, $5);") //stmt, err := txn.Prepare(pq.CopyIn("original", "uid", "hash", "total_size", "header", "body")) if err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("txn.Prepare stmt error rolling back", err) } return err } //hstmt, err := txn.Prepare(pq.CopyIn("files", "hash", "path")) hstmt, err := txn.Prepare("INSERT INTO files (hash, path) VALUES ($1, $2);") if err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("txn.Prepare hstmt error rolling back", err) } return err } var total int err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } base := filepath.Base(path) if skip.Contains(base) { return nil } r, err := os.Open(path) if err != nil { return err } defer r.Close() b, err := ioutil.ReadAll(r) if err != nil { return err } hdr_size := bytes.Index(b, CRCR) h, err := email.Hash(bytes.NewReader(b), headers) if err != nil { glog.Errorf("%s not an mail file", path) glog.Infof("%q", err.Error()) return nil } chksum := fmt.Sprintf("%x", h.Sum(nil)) if _, err := hstmt.Exec(chksum, path); err != nil { return err } if dup.Contains(chksum) { glog.Warningln("Dup ", chksum, path[len(root)+1:], len(b)) dupCnt++ return nil } dup.Add(chksum) n := len(b) total += n delta := time.Since(start) if cnt%1000 == 0 { glog.Infof("%d messages processed in %s: %.2f msg/s %s/s", cnt, delta, float64(cnt)/delta.Seconds(), types.Base2Size(float64(total)/delta.Seconds())) } if _, err := stmt.Exec(uid, chksum, hdr_size, n, b); err != nil { return err } cnt++ return nil }) /* if _, err := stmt.Exec(); err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("stmt.Exec error rolling back", err) } return err } */ if err := stmt.Close(); err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("stmt.Close error rolling back", err) } return err } if err := hstmt.Close(); err != nil { if err := txn.Rollback(); err != nil { glog.Errorln("stmt.Close error rolling back", err) } return err } return txn.Commit() } func GetUid(db *sql.DB, username string) (int, error) { var uid int err := db.QueryRow("SELECT uid FROM person WHERE username=$1", username).Scan(&uid) switch { case err == sql.ErrNoRows, err != nil: return -1, err } return uid, nil } func main() { defer glog.Flush() flag.Parse() // TODO(wathiede): make a set of flags. db, err := sql.Open("postgres", "user=gomail dbname=gomail sslmode=disable") if err != nil { glog.Fatal(err) } uid, err := GetUid(db, *username) if err != nil { glog.Fatal(err) } glog.Infoln("Using uid", uid, "for", *username) if *maildir == "" { fmt.Println("Must specify Maildir with -maildir") os.Exit(1) } skip := set.NewStrings(strings.Split(*skipFiles, ",")...) glog.Infoln("Skip files", skip) if err := Load(db, uid, *maildir, skip); err != nil { glog.Fatal(err) } }