Add import for 'important' headers.

Process and normalize some headers that will be used in quickly recalling
messages.
This commit is contained in:
Bill Thiede 2014-04-11 21:21:56 -07:00
parent aebb1a2c92
commit 14bd95851c
4 changed files with 121 additions and 3 deletions

View File

@ -20,10 +20,19 @@ func insertHeaders(c *db.Conn, hash string, r io.Reader) error {
if err != nil {
return err
}
glog.Infoln("Insert", hash)
glog.Infoln("Insert headers", hash)
return c.InsertHeaders(hash, msg.Header)
}
func insertSearchHeaders(c *db.Conn, hash string, r io.Reader) error {
msg, err := mail.ReadMessage(r)
if err != nil {
return err
}
glog.Infoln("Insert search headers", hash)
return c.InsertSearchHeaders(hash, msg.Header)
}
func insertAllHeaders(c *db.Conn, all bool) error {
oCh := make(chan db.Original)
errc := make(chan error)
@ -39,6 +48,31 @@ func insertAllHeaders(c *db.Conn, all bool) error {
case o := <-oCh:
r := bytes.NewReader(o.Blob)
if err := insertHeaders(c, o.Hash, r); err != nil {
glog.Errorln(o, "insertHeaders", err)
return err
}
case err := <-errc:
return err
}
}
}
func insertAllSearchHeaders(c *db.Conn, all bool) error {
oCh := make(chan db.Original)
errc := make(chan error)
donec := make(chan struct{})
defer close(donec)
if all {
go c.Originals(oCh, errc, donec)
} else {
go c.OriginalsNotInTable("search_header", oCh, errc, donec)
}
for {
select {
case o := <-oCh:
r := bytes.NewReader(o.Blob)
if err := insertSearchHeaders(c, o.Hash, r); err != nil {
glog.Errorln(o.Hash, "insertSearchHeaders", err)
return err
}
case err := <-errc:
@ -60,4 +94,8 @@ func main() {
glog.Fatal(err)
}
if err := insertAllSearchHeaders(c, *all); err != nil {
glog.Fatal(err)
}
}

View File

@ -5,6 +5,7 @@ import (
"flag"
"fmt"
"net/mail"
"time"
"unicode/utf8"
"github.com/golang/glog"
@ -140,7 +141,7 @@ VALUES
for k, vs := range hdrs {
for _, v := range vs {
if !utf8.ValidString(v) {
glog.Infof("%s: value for %q invalid UTF-8 %q", hash, k, v)
glog.Warningf("%s: value for %q invalid UTF-8 %q", hash, k, v)
continue
}
if _, err := stmt.Exec(hash, k, v); err != nil {
@ -150,3 +151,75 @@ VALUES
}
return nil
}
func (c *Conn) InsertSearchHeaders(hash string, hdrs mail.Header) (err error) {
var tx *sql.Tx
tx, err = c.Begin()
if err != nil {
return err
}
defer func() {
if err != nil {
if err := tx.Rollback(); err != nil {
glog.Error(err)
}
return
}
err = tx.Commit()
}()
stmt, err := tx.Prepare(`
INSERT INTO
search_header (hash, name, value)
VALUES
($1, $2, $3)
`)
if err != nil {
return err
}
// Save the following headers to the database untouched.
otherHdrs := []string{"Subject", "Message-Id", "In-Reply-To", "References"}
for _, k := range otherHdrs {
for _, v := range hdrs[k] {
if !utf8.ValidString(v) {
glog.Warningf("%s: value for %q invalid UTF-8 %q", hash, k, v)
continue
}
if _, err := stmt.Exec(hash, k, v); err != nil {
return err
}
}
}
// Parse the following headers as addresses, and save each email address
// found to the database independently.
addrHdrs := []string{"To", "From", "Cc"}
for _, k := range addrHdrs {
for _, value := range hdrs[k] {
addrs, err := mail.ParseAddressList(value)
if err != nil {
glog.Warningf("%s: error parsing address list for %q: %v", hash, k, err)
continue
}
for _, v := range addrs {
if _, err := stmt.Exec(hash, k, v.Address); err != nil {
return err
}
}
}
}
// Normalize date field.
t, err := hdrs.Date()
if err != nil {
glog.Warningf("%s: failed to parse date header: %v", hash, err)
return nil
}
v := t.UTC().Format(time.RFC3339Nano)
if _, err := stmt.Exec(hash, "Date", v); err != nil {
return err
}
return nil
}

View File

@ -43,7 +43,7 @@ func (h Hasher) HashReader(r io.Reader) (hash.Hash, error) {
var std = Hasher([]string{"to", "from", "cc", "date", "subject", "message-id"})
// Hash will parse r as an email, and return the hash as a hexidecimal string
// Hash will parse r as an email, and return the hash as a hexadecimal string
// using a default set of headers.
func HashReader(r io.Reader) (string, error) {
h, err := std.HashReader(r)

View File

@ -0,0 +1,7 @@
DROP TABLE search_header;
CREATE TABLE search_header (
hash CHAR(40) REFERENCES original (hash),
name TEXT,
value TEXT
);