292 lines
5.3 KiB
Go
292 lines
5.3 KiB
Go
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"mime"
|
|
"mime/multipart"
|
|
"net/mail"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"runtime"
|
|
"time"
|
|
)
|
|
|
|
var (
|
|
maildirGlob = flag.String("maildir",
|
|
os.ExpandEnv("${HOME}/Maildir/.*/*/*"),
|
|
"Pattern for email sent files")
|
|
attachementsDir = flag.String("attachementsDir",
|
|
os.ExpandEnv("${HOME}/.attachments"),
|
|
"Directory to store attachements in")
|
|
)
|
|
|
|
type Attachment struct {
|
|
Part multipart.Part
|
|
Bytes []byte
|
|
}
|
|
|
|
type Email struct {
|
|
FileName string
|
|
Message *mail.Message
|
|
Attachments []Attachment
|
|
}
|
|
|
|
func NewEmail(fn string) (*Email, error) {
|
|
//log.Print("Parsing ", fn)
|
|
email := &Email{
|
|
FileName: fn,
|
|
Attachments: []Attachment{},
|
|
}
|
|
|
|
err := email.ParseMessage()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = email.ParseParts(email.Message)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return email, nil
|
|
}
|
|
|
|
func (e *Email) String() string {
|
|
return fmt.Sprintf("%s:\n\n%#v\n\n%#v\n\n%#v", e.FileName, e.Message,
|
|
e.Attachments)
|
|
}
|
|
|
|
func (e *Email) ParseMessage() error {
|
|
r, err := os.Open(e.FileName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
e.Message, err = mail.ReadMessage(r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *Email) ParseSubParts(part *multipart.Part) ([]byte, error) {
|
|
v := part.Header.Get("Content-Type")
|
|
_, params, err := mime.ParseMediaType(v)
|
|
if err != nil {
|
|
bytes, err := ioutil.ReadAll(part)
|
|
return bytes, err
|
|
}
|
|
|
|
boundary, ok := params["boundary"]
|
|
if !ok {
|
|
// Not multipart
|
|
bytes, err := ioutil.ReadAll(part)
|
|
return bytes, err
|
|
}
|
|
pr := multipart.NewReader(part, boundary)
|
|
|
|
for {
|
|
p, err := pr.NextPart()
|
|
switch err {
|
|
case nil:
|
|
bytes, err := e.ParseSubParts(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if bytes != nil {
|
|
e.Attachments = append(e.Attachments, Attachment{
|
|
Part: *p,
|
|
Bytes: bytes,
|
|
})
|
|
}
|
|
case io.EOF:
|
|
return nil, nil
|
|
default:
|
|
return nil, err
|
|
}
|
|
}
|
|
panic("never reached")
|
|
}
|
|
|
|
func (e *Email) ParseParts(msg *mail.Message) error {
|
|
v := msg.Header.Get("Content-Type")
|
|
_, params, err := mime.ParseMediaType(v)
|
|
if err != nil {
|
|
// Not multipart
|
|
return nil
|
|
}
|
|
|
|
boundary, ok := params["boundary"]
|
|
if !ok {
|
|
// Not multipart
|
|
return nil
|
|
}
|
|
|
|
pr := multipart.NewReader(e.Message.Body, boundary)
|
|
for {
|
|
part, err := pr.NextPart()
|
|
switch err {
|
|
case nil:
|
|
bytes, err := e.ParseSubParts(part)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if bytes != nil {
|
|
e.Attachments = append(e.Attachments, Attachment{
|
|
Part: *part,
|
|
Bytes: bytes,
|
|
})
|
|
}
|
|
case io.EOF:
|
|
return nil
|
|
default:
|
|
return err
|
|
}
|
|
|
|
}
|
|
panic("never reached")
|
|
}
|
|
|
|
func createDirIfNeeded(dir string) {
|
|
_, err := os.Stat(dir)
|
|
if err != nil {
|
|
log.Println("Creating", dir)
|
|
os.MkdirAll(dir, 0700)
|
|
}
|
|
}
|
|
|
|
func (e *Email) saveAttachment(dir string, a *Attachment) error {
|
|
fn := a.Part.FileName()
|
|
if fn != "" {
|
|
createDirIfNeeded(dir)
|
|
|
|
targetFn := path.Join(dir, fn)
|
|
log.Println("Saving", targetFn)
|
|
f, err := os.OpenFile(targetFn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
n, err := f.Write(a.Bytes)
|
|
log.Printf("savePart copied %d bytes", n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *Email) SaveAttachments(dir string) error {
|
|
for _, a := range e.Attachments {
|
|
if err := e.saveAttachment(dir, &a); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
ncpus := runtime.NumCPU()
|
|
runtime.GOMAXPROCS(ncpus)
|
|
log.Printf("Parsing %s with %d workers", *maildirGlob, ncpus)
|
|
|
|
files, err := filepath.Glob(*maildirGlob)
|
|
if err != nil {
|
|
log.Fatalf("Failed to glob %q: %s", *maildirGlob, err)
|
|
}
|
|
|
|
worker := func(fnCh chan string, done chan bool) {
|
|
for fn := range fnCh {
|
|
email, err := NewEmail(fn)
|
|
if err != nil {
|
|
log.Printf("Error parsing %q: %s", fn, err)
|
|
continue
|
|
}
|
|
|
|
d, err := email.Message.Header.Date()
|
|
if err != nil {
|
|
log.Printf("Error parsing date in %q: %s", fn, err)
|
|
continue
|
|
}
|
|
|
|
subdir := fmt.Sprintf("%04d/%02d/%02d", d.Year(), d.Month(), d.Day())
|
|
dir := path.Join(*attachementsDir, subdir)
|
|
email.SaveAttachments(dir)
|
|
}
|
|
done <- true
|
|
}
|
|
|
|
ch := make(chan string, ncpus)
|
|
done := make(chan bool, 1)
|
|
for i := 0; i < ncpus; i++ {
|
|
go worker(ch, done)
|
|
}
|
|
|
|
start := time.Now()
|
|
reportChunk := 1000
|
|
for idx, fn := range files {
|
|
if idx != 0 && idx%reportChunk == 0 {
|
|
delta := time.Since(start)
|
|
log.Printf("Processing %d/%d %.2f msg/s", idx, len(files),
|
|
float64(reportChunk)/delta.Seconds())
|
|
start = time.Now()
|
|
}
|
|
ch <- fn
|
|
}
|
|
close(ch)
|
|
for i := 0; i < ncpus; i++ {
|
|
<-done
|
|
}
|
|
}
|
|
|
|
func NewMimeReader(msg *mail.Message) *multipart.Reader {
|
|
v := msg.Header.Get("Content-Type")
|
|
_, params, err := mime.ParseMediaType(v)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
boundary, ok := params["boundary"]
|
|
if !ok {
|
|
//log.Printf("Found Content-Type %q, missing boundary", d)
|
|
return nil
|
|
}
|
|
|
|
return multipart.NewReader(msg.Body, boundary)
|
|
}
|
|
|
|
func PrintMimeParts(r *multipart.Reader) {
|
|
for {
|
|
part, err := r.NextPart()
|
|
if err == io.EOF {
|
|
return
|
|
}
|
|
if err != nil {
|
|
log.Print("Failed to parse mime part ", err)
|
|
return
|
|
}
|
|
|
|
//log.Print("Header ", part.Header)
|
|
content, ok := part.Header["Content-Disposition"]
|
|
if ok {
|
|
log.Print("\tContent-Disposition ", content)
|
|
if part.FileName() != "" {
|
|
log.Print("FileName ", part.FileName())
|
|
}
|
|
if part.FormName() != "" {
|
|
log.Print("FormName ", part.FormName())
|
|
}
|
|
}
|
|
}
|
|
}
|