Initial commit of a handleful of utils

This commit is contained in:
Bill Thiede 2012-06-28 17:02:07 -07:00
commit 4c43b679a1
6 changed files with 538 additions and 0 deletions

156
src/dedup/dedup.go Normal file
View File

@ -0,0 +1,156 @@
package main
import (
"crypto/sha1"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
)
var totalFiles = 0
var filePat = flag.String("filepat", "*", "Glob for list of files to dedup")
var debug = debugT(false)
type debugT bool
func (d debugT) Print(args ...interface{}) {
if d {
log.Print(args...)
}
}
func (d debugT) Printf(format string, args ...interface{}) {
if d {
log.Printf(format, args...)
}
}
type Checksum struct {
Checksum, Filename string
}
func main() {
flag.Parse()
files, err := filepath.Glob(os.ExpandEnv(*filePat))
if err != nil {
log.Fatalf("Failed to glob %q: %s", *filePat, err)
}
totalFiles = len(files)
debug.Printf("Found %d files", totalFiles)
numWorkers := 10
fns := make(chan string, numWorkers)
sums := make(chan Checksum, numWorkers)
done := make(chan bool)
for i:=0; i< numWorkers; i++ {
go checksumer(fns, sums, done)
}
go computeDups(sums, done)
//go printer(sums, done)
for _, fn := range files {
fns <- fn
}
close(fns)
for i:=0; i< numWorkers; i++ {
<-done
}
close(sums)
<-done
}
func removeDups(dups map[string][]string) {
log.Print("Removing dups")
for _, fns := range dups {
if len(fns) > 1 {
log.Print("Skipping: ", fns[0])
log.Print("Deleting")
for _, fn := range fns[1:] {
log.Print("\t", fn)
err := os.Remove(fn)
if err != nil {
log.Fatalf("Failed to remove file: %s", err)
}
}
}
}
}
func computeDups(sums <-chan Checksum, done chan bool) {
dups := map[string][]string{}
count := 0
for sum := range sums {
dups[sum.Checksum] = append(dups[sum.Checksum], sum.Filename)
if count > 0 && (count % 1000) == 0 {
log.Printf("Processed %d/%d files", count, totalFiles)
}
count++
}
count = 0
for _, fns := range dups {
if len(fns) > 1 {
count++
}
}
if count == 0 {
log.Print("No dups found")
} else {
log.Printf("Found %d files with dups, delete them? [y/n]", count)
var answer string
fmt.Scan(&answer)
if strings.HasPrefix(strings.ToLower(answer), "y") {
removeDups(dups)
}
}
done <- true
}
func printer(sums <-chan Checksum, done chan bool) {
for sum := range sums {
fmt.Println(sum.Checksum, sum.Filename)
}
done <- true
}
func checksumer(fns <-chan string, sums chan<- Checksum, done chan bool) {
for fn := range fns {
f, err := os.Open(fn)
if err != nil {
log.Print("Error opening file: ", err)
continue
}
fi, err := f.Stat()
if err != nil {
log.Print("Error stating file: ", err)
} else {
debug.Printf("Mode %s for %q", fi.Mode(), fn)
if (fi.Mode() & os.ModeType) != 0 {
debug.Print("Skipping non-file ", fn)
}
}
h := sha1.New()
io.Copy(h, f)
f.Close()
sums <- Checksum{
Checksum: fmt.Sprintf("%x", h.Sum(nil)),
Filename: fn,
}
}
done <- true
}

22
src/lsmime/cte.txt Normal file
View File

@ -0,0 +1,22 @@
1 Content-Transfer-Encoding to 8bit, although that may violate a SHOULD
1 Content-Transfer-Encoding: base64<br>
2 Content-Transfer-Encoding:
2 Content-Transfer-Encoding: 7bit
6 Content-Transfer-Encoding:7bit
10 Content-Transfer-Encoding: 8Bit
15 Content-Transfer-Encoding:quoted-printable
19 Content-Transfer-Encoding: Quoted-printable
22 Content-Transfer-Encoding: quoted-printable
36 Content-Transfer-Encoding: us-ascii
58 Content-Transfer-Encoding: 7Bit
62 Content-Transfer-Encoding: 7BIT
68 Content-Transfer-Encoding: 8BIT
88 Content-Transfer-Encoding: Quoted-Printable
96 Content-Transfer-Encoding: binary
125 Content-Transfer-Encoding: QUOTED-PRINTABLE
135 Content-Transfer-Encoding: BASE64
6695 Content-Transfer-Encoding: 7bit;
13709 Content-Transfer-Encoding: base64
14614 Content-Transfer-Encoding: 8bit
92165 Content-Transfer-Encoding: quoted-printable
155825 Content-Transfer-Encoding: 7bit

BIN
src/lsmime/lsmime Executable file

Binary file not shown.

291
src/lsmime/lsmime.go Normal file
View File

@ -0,0 +1,291 @@
package main
import (
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"mime"
"mime/multipart"
"net/mail"
"os"
"path"
"path/filepath"
"runtime"
"time"
)
var (
maildirGlob = flag.String("maildir",
os.ExpandEnv("${HOME}/Maildir/.*/*/*"),
"Pattern for email sent files")
attachementsDir = flag.String("attachementsDir",
os.ExpandEnv("${HOME}/.attachments"),
"Directory to store attachements in")
)
type Attachment struct {
Part multipart.Part
Bytes []byte
}
type Email struct {
FileName string
Message *mail.Message
Attachments []Attachment
}
func NewEmail(fn string) (*Email, error) {
//log.Print("Parsing ", fn)
email := &Email{
FileName: fn,
Attachments: []Attachment{},
}
err := email.ParseMessage()
if err != nil {
return nil, err
}
err = email.ParseParts(email.Message)
if err != nil {
return nil, err
}
return email, nil
}
func (e *Email) String() string {
return fmt.Sprintf("%s:\n\n%#v\n\n%#v\n\n%#v", e.FileName, e.Message,
e.Attachments)
}
func (e *Email) ParseMessage() error {
r, err := os.Open(e.FileName)
if err != nil {
return err
}
e.Message, err = mail.ReadMessage(r)
if err != nil {
return err
}
return nil
}
func (e *Email) ParseSubParts(part *multipart.Part) ([]byte, error) {
v := part.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
bytes, err := ioutil.ReadAll(part)
return bytes, err
}
boundary, ok := params["boundary"]
if !ok {
// Not multipart
bytes, err := ioutil.ReadAll(part)
return bytes, err
}
pr := multipart.NewReader(part, boundary)
for {
p, err := pr.NextPart()
switch err {
case nil:
bytes, err := e.ParseSubParts(p)
if err != nil {
return nil, err
}
if bytes != nil {
e.Attachments = append(e.Attachments, Attachment{
Part: *p,
Bytes: bytes,
})
}
case io.EOF:
return nil, nil
default:
return nil, err
}
}
panic("never reached")
}
func (e *Email) ParseParts(msg *mail.Message) error {
v := msg.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
// Not multipart
return nil
}
boundary, ok := params["boundary"]
if !ok {
// Not multipart
return nil
}
pr := multipart.NewReader(e.Message.Body, boundary)
for {
part, err := pr.NextPart()
switch err {
case nil:
bytes, err := e.ParseSubParts(part)
if err != nil {
return err
}
if bytes != nil {
e.Attachments = append(e.Attachments, Attachment{
Part: *part,
Bytes: bytes,
})
}
case io.EOF:
return nil
default:
return err
}
}
panic("never reached")
}
func createDirIfNeeded(dir string) {
_, err := os.Stat(dir)
if err != nil {
log.Println("Creating", dir)
os.MkdirAll(dir, 0700)
}
}
func (e *Email) saveAttachment(dir string, a *Attachment) error {
fn := a.Part.FileName()
if fn != "" {
createDirIfNeeded(dir)
targetFn := path.Join(dir, fn)
log.Println("Saving", targetFn)
f, err := os.OpenFile(targetFn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
if err != nil {
return err
}
defer f.Close()
n, err := f.Write(a.Bytes)
log.Printf("savePart copied %d bytes", n)
if err != nil {
return err
}
}
return nil
}
func (e *Email) SaveAttachments(dir string) error {
for _, a := range e.Attachments {
if err := e.saveAttachment(dir, &a); err != nil {
return err
}
}
return nil
}
func main() {
flag.Parse()
ncpus := runtime.NumCPU()
runtime.GOMAXPROCS(ncpus)
log.Printf("Parsing %s with %d workers", *maildirGlob, ncpus)
files, err := filepath.Glob(*maildirGlob)
if err != nil {
log.Fatalf("Failed to glob %q: %s", *maildirGlob, err)
}
worker := func(fnCh chan string, done chan bool) {
for fn := range fnCh {
email, err := NewEmail(fn)
if err != nil {
log.Printf("Error parsing %q: %s", fn, err)
continue
}
d, err := email.Message.Header.Date()
if err != nil {
log.Printf("Error parsing date in %q: %s", fn, err)
continue
}
subdir := fmt.Sprintf("%04d/%02d/%02d", d.Year(), d.Month(), d.Day())
dir := path.Join(*attachementsDir, subdir)
email.SaveAttachments(dir)
}
done <- true
}
ch := make(chan string, ncpus)
done := make(chan bool, 1)
for i := 0; i < ncpus; i++ {
go worker(ch, done)
}
start := time.Now()
reportChunk := 1000
for idx, fn := range files {
if idx != 0 && idx%reportChunk == 0 {
delta := time.Since(start)
log.Printf("Processing %d/%d %.2f msg/s", idx, len(files),
float64(reportChunk)/delta.Seconds())
start = time.Now()
}
ch <- fn
}
close(ch)
for i := 0; i < ncpus; i++ {
<-done
}
}
func NewMimeReader(msg *mail.Message) *multipart.Reader {
v := msg.Header.Get("Content-Type")
_, params, err := mime.ParseMediaType(v)
if err != nil {
return nil
}
boundary, ok := params["boundary"]
if !ok {
//log.Printf("Found Content-Type %q, missing boundary", d)
return nil
}
return multipart.NewReader(msg.Body, boundary)
}
func PrintMimeParts(r *multipart.Reader) {
for {
part, err := r.NextPart()
if err == io.EOF {
return
}
if err != nil {
log.Print("Failed to parse mime part ", err)
return
}
//log.Print("Header ", part.Header)
content, ok := part.Header["Content-Disposition"]
if ok {
log.Print("\tContent-Disposition ", content)
if part.FileName() != "" {
log.Print("FileName ", part.FileName())
}
if part.FormName() != "" {
log.Print("FormName ", part.FormName())
}
}
}
}

48
src/notmuch/notmuch.go Normal file
View File

@ -0,0 +1,48 @@
package notmuch
import (
"log"
"strconv"
"strings"
"os/exec"
)
const (
notmuchCmd = "notmuch"
unreadThreadCountArgs = "count --output=threads tag:unread"
unreadMessageCountArgs = "count --output=messages tag:unread"
)
type Notmuch struct {
}
func NewNotmuch(_ string) *Notmuch {
return &Notmuch{}
}
func notmuchMust(options []string) string {
cmd := exec.Command(notmuchCmd, options...)
b, err := cmd.Output()
if err != nil {
log.Fatalf("Failed to read from process %v: %s\n", cmd, err)
}
return string(b)
}
func intFromCommand(options string) int {
output := notmuchMust(strings.Fields(options))
cnt, err := strconv.Atoi(strings.TrimSpace(output))
if err != nil {
log.Fatalf("Failed to parse int from %q: %s\n", output, err)
}
return cnt
}
func (nm *Notmuch) UnreadMessageCount() int {
return intFromCommand(unreadMessageCountArgs)
}
func (nm *Notmuch) UnreadThreadCount() int {
return intFromCommand(unreadThreadCountArgs)
}

View File

@ -0,0 +1,21 @@
package notmuch
import (
"testing"
)
func TestUnreadMessageCount(t *testing.T) {
nm := NewNotmuch("")
count := nm.UnreadMessageCount()
if count <= 0 {
t.Errorf("Count wasn't enough, expected > 0, got %d\n", count)
}
}
func TestUnreadThreadCount(t *testing.T) {
nm := NewNotmuch("")
count := nm.UnreadThreadCount()
if count <= 0 {
t.Errorf("Count wasn't enough, expected > 0, got %d\n", count)
}
}