email/lsmime/lsmime.go
2013-08-13 20:54:10 -07:00

292 lines
5.3 KiB
Go

package main
import (
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"mime"
"mime/multipart"
"net/mail"
"os"
"path"
"path/filepath"
"runtime"
"time"
)
var (
maildirGlob = flag.String("maildir",
os.ExpandEnv("${HOME}/Maildir/.*/*/*"),
"Pattern for email sent files")
attachementsDir = flag.String("attachementsDir",
os.ExpandEnv("${HOME}/.attachments"),
"Directory to store attachements in")
)
type Attachment struct {
Part multipart.Part
Bytes []byte
}
type Email struct {
FileName string
Message *mail.Message
Attachments []Attachment
}
func NewEmail(fn string) (*Email, error) {
//log.Print("Parsing ", fn)
email := &Email{
FileName: fn,
Attachments: []Attachment{},
}
err := email.ParseMessage()
if err != nil {
return nil, err
}
err = email.ParseParts(email.Message)
if err != nil {
return nil, err
}
return email, nil
}
func (e *Email) String() string {
return fmt.Sprintf("%s:\n\n%#v\n\n%#v\n\n%#v", e.FileName, e.Message,
e.Attachments)
}
func (e *Email) ParseMessage() error {
r, err := os.Open(e.FileName)
if err != nil {
return err
}
e.Message, err = mail.ReadMessage(r)
if err != nil {
return err
}
return nil
}
func (e *Email) ParseSubParts(part *multipart.Part) ([]byte, error) {
v := part.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
bytes, err := ioutil.ReadAll(part)
return bytes, err
}
boundary, ok := params["boundary"]
if !ok {
// Not multipart
bytes, err := ioutil.ReadAll(part)
return bytes, err
}
pr := multipart.NewReader(part, boundary)
for {
p, err := pr.NextPart()
switch err {
case nil:
bytes, err := e.ParseSubParts(p)
if err != nil {
return nil, err
}
if bytes != nil {
e.Attachments = append(e.Attachments, Attachment{
Part: *p,
Bytes: bytes,
})
}
case io.EOF:
return nil, nil
default:
return nil, err
}
}
panic("never reached")
}
func (e *Email) ParseParts(msg *mail.Message) error {
v := msg.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
// Not multipart
return nil
}
boundary, ok := params["boundary"]
if !ok {
// Not multipart
return nil
}
pr := multipart.NewReader(e.Message.Body, boundary)
for {
part, err := pr.NextPart()
switch err {
case nil:
bytes, err := e.ParseSubParts(part)
if err != nil {
return err
}
if bytes != nil {
e.Attachments = append(e.Attachments, Attachment{
Part: *part,
Bytes: bytes,
})
}
case io.EOF:
return nil
default:
return err
}
}
panic("never reached")
}
func createDirIfNeeded(dir string) {
_, err := os.Stat(dir)
if err != nil {
log.Println("Creating", dir)
os.MkdirAll(dir, 0700)
}
}
func (e *Email) saveAttachment(dir string, a *Attachment) error {
fn := a.Part.FileName()
if fn != "" {
createDirIfNeeded(dir)
targetFn := path.Join(dir, fn)
log.Println("Saving", targetFn)
f, err := os.OpenFile(targetFn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
return err
}
defer f.Close()
n, err := f.Write(a.Bytes)
log.Printf("savePart copied %d bytes", n)
if err != nil {
return err
}
}
return nil
}
func (e *Email) SaveAttachments(dir string) error {
for _, a := range e.Attachments {
if err := e.saveAttachment(dir, &a); err != nil {
return err
}
}
return nil
}
func main() {
flag.Parse()
ncpus := runtime.NumCPU()
runtime.GOMAXPROCS(ncpus)
log.Printf("Parsing %s with %d workers", *maildirGlob, ncpus)
files, err := filepath.Glob(*maildirGlob)
if err != nil {
log.Fatalf("Failed to glob %q: %s", *maildirGlob, err)
}
worker := func(fnCh chan string, done chan bool) {
for fn := range fnCh {
email, err := NewEmail(fn)
if err != nil {
log.Printf("Error parsing %q: %s", fn, err)
continue
}
d, err := email.Message.Header.Date()
if err != nil {
log.Printf("Error parsing date in %q: %s", fn, err)
continue
}
subdir := fmt.Sprintf("%04d/%02d/%02d", d.Year(), d.Month(), d.Day())
dir := path.Join(*attachementsDir, subdir)
email.SaveAttachments(dir)
}
done <- true
}
ch := make(chan string, ncpus)
done := make(chan bool, 1)
for i := 0; i < ncpus; i++ {
go worker(ch, done)
}
start := time.Now()
reportChunk := 1000
for idx, fn := range files {
if idx != 0 && idx%reportChunk == 0 {
delta := time.Since(start)
log.Printf("Processing %d/%d %.2f msg/s", idx, len(files),
float64(reportChunk)/delta.Seconds())
start = time.Now()
}
ch <- fn
}
close(ch)
for i := 0; i < ncpus; i++ {
<-done
}
}
func NewMimeReader(msg *mail.Message) *multipart.Reader {
v := msg.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
return nil
}
boundary, ok := params["boundary"]
if !ok {
//log.Printf("Found Content-Type %q, missing boundary", d)
return nil
}
return multipart.NewReader(msg.Body, boundary)
}
func PrintMimeParts(r *multipart.Reader) {
for {
part, err := r.NextPart()
if err == io.EOF {
return
}
if err != nil {
log.Print("Failed to parse mime part ", err)
return
}
//log.Print("Header ", part.Header)
content, ok := part.Header["Content-Disposition"]
if ok {
log.Print("\tContent-Disposition ", content)
if part.FileName() != "" {
log.Print("FileName ", part.FileName())
}
if part.FormName() != "" {
log.Print("FormName ", part.FormName())
}
}
}
}