commit 4c43b679a10ca20ac5dfdd030dda5496254fcee0 Author: Bill Thiede Date: Thu Jun 28 17:02:07 2012 -0700 Initial commit of a handleful of utils diff --git a/src/dedup/dedup.go b/src/dedup/dedup.go new file mode 100644 index 0000000..2e96ab5 --- /dev/null +++ b/src/dedup/dedup.go @@ -0,0 +1,156 @@ +package main + +import ( + "crypto/sha1" + "flag" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" +) + +var totalFiles = 0 + +var filePat = flag.String("filepat", "*", "Glob for list of files to dedup") + +var debug = debugT(false) + +type debugT bool + +func (d debugT) Print(args ...interface{}) { + if d { + log.Print(args...) + } +} + +func (d debugT) Printf(format string, args ...interface{}) { + if d { + log.Printf(format, args...) + } +} + +type Checksum struct { + Checksum, Filename string +} + +func main() { + flag.Parse() + + files, err := filepath.Glob(os.ExpandEnv(*filePat)) + if err != nil { + log.Fatalf("Failed to glob %q: %s", *filePat, err) + } + totalFiles = len(files) + debug.Printf("Found %d files", totalFiles) + + numWorkers := 10 + fns := make(chan string, numWorkers) + sums := make(chan Checksum, numWorkers) + done := make(chan bool) + + for i:=0; i< numWorkers; i++ { + go checksumer(fns, sums, done) + } + go computeDups(sums, done) + //go printer(sums, done) + + for _, fn := range files { + fns <- fn + } + + close(fns) + + for i:=0; i< numWorkers; i++ { + <-done + } + + close(sums) + <-done +} + +func removeDups(dups map[string][]string) { + log.Print("Removing dups") + for _, fns := range dups { + if len(fns) > 1 { + log.Print("Skipping: ", fns[0]) + log.Print("Deleting") + for _, fn := range fns[1:] { + log.Print("\t", fn) + err := os.Remove(fn) + if err != nil { + log.Fatalf("Failed to remove file: %s", err) + } + } + } + } +} + +func computeDups(sums <-chan Checksum, done chan bool) { + dups := map[string][]string{} + count := 0 + for sum := range sums { + dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename) + + if count > 0 && (count % 1000) == 0 { + log.Printf("Processed %d/%d files", count, totalFiles) + } + count++ + } + + count = 0 + for _, fns := range dups { + if len(fns) > 1 { + count++ + } + } + + if count == 0 { + log.Print("No dups found") + } else { + log.Printf("Found %d files with dups, delete them? [y/n]", count) + var answer string + fmt.Scan(&answer) + if strings.HasPrefix(strings.ToLower(answer), "y") { + removeDups(dups) + } + } + + done <- true +} + +func printer(sums <-chan Checksum, done chan bool) { + for sum := range sums { + fmt.Println(sum.Checksum, sum.Filename) + } + done <- true +} + +func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) { + for fn := range fns { + f, err := os.Open(fn) + if err != nil { + log.Print("Error opening file: ", err) + continue + } + fi, err := f.Stat() + if err != nil { + log.Print("Error stating file: ", err) + } else { + debug.Printf("Mode %s for %q", fi.Mode(), fn) + if (fi.Mode() & os.ModeType) != 0 { + debug.Print("Skipping non-file ", fn) + } + } + + h := sha1.New() + io.Copy(h, f) + f.Close() + sums <- Checksum{ + Checksum: fmt.Sprintf("%x", h.Sum(nil)), + Filename: fn, + } + } + done <- true +} diff --git a/src/lsmime/cte.txt b/src/lsmime/cte.txt new file mode 100644 index 0000000..76b7887 --- /dev/null +++ b/src/lsmime/cte.txt @@ -0,0 +1,22 @@ + 1 Content-Transfer-Encoding to 8bit, although that may violate a SHOULD + 1 Content-Transfer-Encoding: base64
+ 2 Content-Transfer-Encoding: + 2 Content-Transfer-Encoding: 7bit + 6 Content-Transfer-Encoding:7bit + 10 Content-Transfer-Encoding: 8Bit + 15 Content-Transfer-Encoding:quoted-printable + 19 Content-Transfer-Encoding: Quoted-printable + 22 Content-Transfer-Encoding: quoted-printable + 36 Content-Transfer-Encoding: us-ascii + 58 Content-Transfer-Encoding: 7Bit + 62 Content-Transfer-Encoding: 7BIT + 68 Content-Transfer-Encoding: 8BIT + 88 Content-Transfer-Encoding: Quoted-Printable + 96 Content-Transfer-Encoding: binary + 125 Content-Transfer-Encoding: QUOTED-PRINTABLE + 135 Content-Transfer-Encoding: BASE64 +6695 Content-Transfer-Encoding: 7bit; +13709 Content-Transfer-Encoding: base64 +14614 Content-Transfer-Encoding: 8bit +92165 Content-Transfer-Encoding: quoted-printable +155825 Content-Transfer-Encoding: 7bit diff --git a/src/lsmime/lsmime b/src/lsmime/lsmime new file mode 100755 index 0000000..1656f60 Binary files /dev/null and b/src/lsmime/lsmime differ diff --git a/src/lsmime/lsmime.go b/src/lsmime/lsmime.go new file mode 100644 index 0000000..3a7453f --- /dev/null +++ b/src/lsmime/lsmime.go @@ -0,0 +1,291 @@ +package main + +import ( + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "mime" + "mime/multipart" + "net/mail" + "os" + "path" + "path/filepath" + "runtime" + "time" +) + +var ( + maildirGlob = flag.String("maildir", + os.ExpandEnv("${HOME}/Maildir/.*/*/*"), + "Pattern for email sent files") + attachementsDir = flag.String("attachementsDir", + os.ExpandEnv("${HOME}/.attachments"), + "Directory to store attachements in") +) + +type Attachment struct { + Part multipart.Part + Bytes []byte +} + +type Email struct { + FileName string + Message *mail.Message + Attachments []Attachment +} + +func NewEmail(fn string) (*Email, error) { + //log.Print("Parsing ", fn) + email := &Email{ + FileName: fn, + Attachments: []Attachment{}, + } + + err := email.ParseMessage() + if err != nil { + return nil, err + } + + err = email.ParseParts(email.Message) + if err != nil { + return nil, err + } + + return email, nil +} + +func (e *Email) String() string { + return fmt.Sprintf("%s:\n\n%#v\n\n%#v\n\n%#v", e.FileName, e.Message, + e.Attachments) +} + +func (e *Email) ParseMessage() error { + r, err := os.Open(e.FileName) + if err != nil { + return err + } + + e.Message, err = mail.ReadMessage(r) + if err != nil { + return err + } + + return nil +} + +func (e *Email) ParseSubParts(part *multipart.Part) ([]byte, error) { + v := part.Header.Get("Content-Type") + _, params, err := mime.ParseMediaType(v) + if err != nil { + bytes, err := ioutil.ReadAll(part) + return bytes, err + } + + boundary, ok := params["boundary"] + if !ok { + // Not multipart + bytes, err := ioutil.ReadAll(part) + return bytes, err + } + pr := multipart.NewReader(part, boundary) + + for { + p, err := pr.NextPart() + switch err { + case nil: + bytes, err := e.ParseSubParts(p) + if err != nil { + return nil, err + } + + if bytes != nil { + e.Attachments = append(e.Attachments, Attachment{ + Part: *p, + Bytes: bytes, + }) + } + case io.EOF: + return nil, nil + default: + return nil, err + } + } + panic("never reached") +} + +func (e *Email) ParseParts(msg *mail.Message) error { + v := msg.Header.Get("Content-Type") + _, params, err := mime.ParseMediaType(v) + if err != nil { + // Not multipart + return nil + } + + boundary, ok := params["boundary"] + if !ok { + // Not multipart + return nil + } + + pr := multipart.NewReader(e.Message.Body, boundary) + for { + part, err := pr.NextPart() + switch err { + case nil: + bytes, err := e.ParseSubParts(part) + if err != nil { + return err + } + + if bytes != nil { + e.Attachments = append(e.Attachments, Attachment{ + Part: *part, + Bytes: bytes, + }) + } + case io.EOF: + return nil + default: + return err + } + + } + panic("never reached") +} + +func createDirIfNeeded(dir string) { + _, err := os.Stat(dir) + if err != nil { + log.Println("Creating", dir) + os.MkdirAll(dir, 0700) + } +} + +func (e *Email) saveAttachment(dir string, a *Attachment) error { + fn := a.Part.FileName() + if fn != "" { + createDirIfNeeded(dir) + + targetFn := path.Join(dir, fn) + log.Println("Saving", targetFn) + f, err := os.OpenFile(targetFn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) + if err != nil { + return err + } + defer f.Close() + + n, err := f.Write(a.Bytes) + log.Printf("savePart copied %d bytes", n) + if err != nil { + return err + } + } + return nil +} + +func (e *Email) SaveAttachments(dir string) error { + for _, a := range e.Attachments { + if err := e.saveAttachment(dir, &a); err != nil { + return err + } + } + return nil +} + +func main() { + flag.Parse() + ncpus := runtime.NumCPU() + runtime.GOMAXPROCS(ncpus) + log.Printf("Parsing %s with %d workers", *maildirGlob, ncpus) + + files, err := filepath.Glob(*maildirGlob) + if err != nil { + log.Fatalf("Failed to glob %q: %s", *maildirGlob, err) + } + + worker := func(fnCh chan string, done chan bool) { + for fn := range fnCh { + email, err := NewEmail(fn) + if err != nil { + log.Printf("Error parsing %q: %s", fn, err) + continue + } + + d, err := email.Message.Header.Date() + if err != nil { + log.Printf("Error parsing date in %q: %s", fn, err) + continue + } + + subdir := fmt.Sprintf("%04d/%02d/%02d", d.Year(), d.Month(), d.Day()) + dir := path.Join(*attachementsDir, subdir) + email.SaveAttachments(dir) + } + done <- true + } + + ch := make(chan string, ncpus) + done := make(chan bool, 1) + for i := 0; i < ncpus; i++ { + go worker(ch, done) + } + + start := time.Now() + reportChunk := 1000 + for idx, fn := range files { + if idx != 0 && idx%reportChunk == 0 { + delta := time.Since(start) + log.Printf("Processing %d/%d %.2f msg/s", idx, len(files), + float64(reportChunk)/delta.Seconds()) + start = time.Now() + } + ch <- fn + } + close(ch) + for i := 0; i < ncpus; i++ { + <-done + } +} + +func NewMimeReader(msg *mail.Message) *multipart.Reader { + v := msg.Header.Get("Content-Type") + _, params, err := mime.ParseMediaType(v) + if err != nil { + return nil + } + + boundary, ok := params["boundary"] + if !ok { + //log.Printf("Found Content-Type %q, missing boundary", d) + return nil + } + + return multipart.NewReader(msg.Body, boundary) +} + +func PrintMimeParts(r *multipart.Reader) { + for { + part, err := r.NextPart() + if err == io.EOF { + return + } + if err != nil { + log.Print("Failed to parse mime part ", err) + return + } + + //log.Print("Header ", part.Header) + content, ok := part.Header["Content-Disposition"] + if ok { + log.Print("\tContent-Disposition ", content) + if part.FileName() != "" { + log.Print("FileName ", part.FileName()) + } + if part.FormName() != "" { + log.Print("FormName ", part.FormName()) + } + } + } +} diff --git a/src/notmuch/notmuch.go b/src/notmuch/notmuch.go new file mode 100644 index 0000000..513bd5a --- /dev/null +++ b/src/notmuch/notmuch.go @@ -0,0 +1,48 @@ +package notmuch + +import ( + "log" + "strconv" + "strings" + "os/exec" +) + +const ( + notmuchCmd = "notmuch" + unreadThreadCountArgs = "count --output=threads tag:unread" + unreadMessageCountArgs = "count --output=messages tag:unread" +) + +type Notmuch struct { +} + +func NewNotmuch(_ string) *Notmuch { + return &Notmuch{} +} + +func notmuchMust(options []string) string { + cmd := exec.Command(notmuchCmd, options...) + b, err := cmd.Output() + if err != nil { + log.Fatalf("Failed to read from process %v: %s\n", cmd, err) + } + + return string(b) +} + +func intFromCommand(options string) int { + output := notmuchMust(strings.Fields(options)) + cnt, err := strconv.Atoi(strings.TrimSpace(output)) + if err != nil { + log.Fatalf("Failed to parse int from %q: %s\n", output, err) + } + return cnt +} + +func (nm *Notmuch) UnreadMessageCount() int { + return intFromCommand(unreadMessageCountArgs) +} + +func (nm *Notmuch) UnreadThreadCount() int { + return intFromCommand(unreadThreadCountArgs) +} diff --git a/src/notmuch/notmuch_test.go b/src/notmuch/notmuch_test.go new file mode 100644 index 0000000..2778322 --- /dev/null +++ b/src/notmuch/notmuch_test.go @@ -0,0 +1,21 @@ +package notmuch + +import ( + "testing" +) + +func TestUnreadMessageCount(t *testing.T) { + nm := NewNotmuch("") + count := nm.UnreadMessageCount() + if count <= 0 { + t.Errorf("Count wasn't enough, expected > 0, got %d\n", count) + } +} + +func TestUnreadThreadCount(t *testing.T) { + nm := NewNotmuch("") + count := nm.UnreadThreadCount() + if count <= 0 { + t.Errorf("Count wasn't enough, expected > 0, got %d\n", count) + } +}