Add helper to print message with hash as a header.
Broke out email.go's Hash() in to multiple functions to make this easier, and update tools that used it.
This commit is contained in:
parent
4c906e9f49
commit
12199604c1
36
cmd/addhashheader/addhashheader.go
Normal file
36
cmd/addhashheader/addhashheader.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/mail"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
|
||||||
|
"xinu.tv/email"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
defer glog.Flush()
|
||||||
|
|
||||||
|
msg, err := mail.ReadMessage(os.Stdin)
|
||||||
|
if err != nil {
|
||||||
|
glog.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
h, err := email.HashMessage(msg)
|
||||||
|
if err != nil {
|
||||||
|
glog.Fatal(err)
|
||||||
|
}
|
||||||
|
for k, vs := range msg.Header {
|
||||||
|
for _, v := range vs {
|
||||||
|
fmt.Printf("%s: %s\n", k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println("X-Xinu-Hash:", h)
|
||||||
|
fmt.Println()
|
||||||
|
io.Copy(os.Stdout, msg.Body)
|
||||||
|
}
|
||||||
109
cmd/addhashheader/testdata/msg.1
vendored
Normal file
109
cmd/addhashheader/testdata/msg.1
vendored
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
Subject: [go-nuts] Re: Runtime code generation for scientific computing?
|
||||||
|
From: Raul Mera <rmeraa@gmail.com>
|
||||||
|
To: golang-nuts@googlegroups.com
|
||||||
|
Date: Fri, 28 Mar 2014 16:14:50 -0700 (PDT)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
On Friday, March 28, 2014 10:31:54 PM UTC+1, egon wrote:
|
||||||
|
>
|
||||||
|
> I've been thinking how to properly approach scientific computing with Go.
|
||||||
|
>
|
||||||
|
> It seems that there are many cases which can be improved at the level of
|
||||||
|
> compiler i.e. multi-dimensional arrays, matrices, sets. There are also
|
||||||
|
> SIMD, GPGPUs, OpenCL, OpenMP, FPUs etc. basically hardware features and
|
||||||
|
> libraries that can improve the speed of calculations. So the question is
|
||||||
|
> how to properly target all of that?
|
||||||
|
>
|
||||||
|
>
|
||||||
|
Adding support for each of those special cases would make the compiler
|
||||||
|
> slower, more complex and make it harder to maintain. So adding all of that
|
||||||
|
> wouldn't probably be a good idea.
|
||||||
|
>
|
||||||
|
>
|
||||||
|
Please let's not mix hardware facilities or especial libraries, which are
|
||||||
|
indeed special cases, with multi-dimensional slices (we already have
|
||||||
|
multidimensional arrays) which are a natural way to represent matrices,
|
||||||
|
which, in turn, are not quite an "special case" but pretty much the main
|
||||||
|
thing you use for any code that deals with mathematics in some way (which
|
||||||
|
is a lot of code, not only scientific). Multidimensional slices are not
|
||||||
|
linked to a particular library, hardware or even algorithm. The proposal
|
||||||
|
currently being discussed in another thread here is a natural extension of
|
||||||
|
what exists, and is careful to keep complexity to a minimun.
|
||||||
|
|
||||||
|
I very much agree with you about GPUGPU, OpenCL, etc. Those things should
|
||||||
|
not be in the language. Also, noone in the scientific community has asked
|
||||||
|
for sets, as far as I know (sorry if I am wrong here), and 2-dimensional
|
||||||
|
slices are the same as matrices for our purposes. This means that while in
|
||||||
|
your mail it appears that a long list of language additions is needed/asked
|
||||||
|
for in order for Go to succeed in scientific programming, the community
|
||||||
|
is actually asking for only one thing,
|
||||||
|
|
||||||
|
Also, saying "multi-dimensional slices, matrices" gives a misleading
|
||||||
|
impression of several issues, when both things are actually the same for
|
||||||
|
our purposes. Since I have not seen anyone from the numerical community
|
||||||
|
asking for sets, the whole list you present as language changes wanted by
|
||||||
|
the scientific computing people is reduced to one item.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
> So what would be the alternative? One approach that would be viable is
|
||||||
|
> runtime code generation, it's complex in it's internals, but idea is
|
||||||
|
> simple. Take a string and convert it to a Go function at runtime. So
|
||||||
|
> convert a string to an AST, run tons of optimizations and convert it to
|
||||||
|
> specialized byte-code and finally run it.
|
||||||
|
>
|
||||||
|
> When we are talking about scientific computing, this means we should do
|
||||||
|
> aggressive inlining, optimizations that may take a lot of time... e.g. if
|
||||||
|
> an optimization strategy takes 4min and it improves the computation by 20%
|
||||||
|
> then with calculations running longer than 20min it's worthwhile to run
|
||||||
|
> that optimization... but it may not be a good idea for general programming.
|
||||||
|
>
|
||||||
|
> I do not like that separation between "scientific" and "general"
|
||||||
|
computing. We both want our code clean, and have to deal with large
|
||||||
|
programs. Still, I agree that if you want to use special hardware and the
|
||||||
|
like, or want the even the *last bit* of performance, no matter what, it is
|
||||||
|
reasonable that you pay the price. I suspect the current approach of
|
||||||
|
delegating those things to C is good enough, but I do not have a strong
|
||||||
|
opinion (well, my strong opinion, like yours, is "leave them out of the
|
||||||
|
language").
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
> So, maybe, the better approach for scientific computation is to provide
|
||||||
|
> the packages for DSLs, AST conversions, transformations, optimizations,
|
||||||
|
> targets and nice runtime code generation support. I know the details will
|
||||||
|
> get complicated, but maybe it's a better than adding every single thing to
|
||||||
|
> the compiler. With Go compiler eventually being written in Go, it would
|
||||||
|
> mean that we can use the same code in runtime code generation.
|
||||||
|
>
|
||||||
|
> What are your thoughts on this?
|
||||||
|
>
|
||||||
|
>
|
||||||
|
I will not speak for them, but I don't see that the numerical/scientific Go
|
||||||
|
comunity (mostly, the gonum people) has proposed or even thinks that we
|
||||||
|
should add every single thing to the compiler, but are actually against
|
||||||
|
that. That part sounds bit like a straw man to me. We want a natural way
|
||||||
|
to represent a very common data structure. We do not want fotran-like
|
||||||
|
matrix multiplication in the language, and we most certainly have not asked
|
||||||
|
for language support for the other things you mention. For several of them,
|
||||||
|
we can probably just work them out at library level, for some, like the GPU
|
||||||
|
thing, we probably will always need to delegate to C (or, actually, CUDA or
|
||||||
|
similar).
|
||||||
|
|
||||||
|
I am not particularly against your proposal, but I don't like that it is
|
||||||
|
presented as an alternative to what the scientific/numerical Go community
|
||||||
|
are asking for.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
+ egon
|
||||||
|
>
|
||||||
|
> (PS. I'm probably not the best person for implementing it, but maybe
|
||||||
|
> someone else gets inspired and runs with the idea :) )
|
||||||
|
>
|
||||||
|
|
||||||
|
--
|
||||||
|
You received this message because you are subscribed to the Google Groups "golang-nuts" group.
|
||||||
|
To unsubscribe from this group and stop receiving emails from it, send an email to golang-nuts+unsubscribe@googlegroups.com.
|
||||||
|
For more options, visit https://groups.google.com/d/optout.
|
||||||
@ -34,8 +34,6 @@ type Messages struct {
|
|||||||
Statuses []Status
|
Statuses []Status
|
||||||
}
|
}
|
||||||
|
|
||||||
var headers = []string{"to", "from", "cc", "date", "subject"}
|
|
||||||
|
|
||||||
func (m *Messages) hashMail(path string, info os.FileInfo, err error) error {
|
func (m *Messages) hashMail(path string, info os.FileInfo, err error) error {
|
||||||
glog.Infoln("Processing", path)
|
glog.Infoln("Processing", path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -58,7 +56,7 @@ func (m *Messages) hashMail(path string, info os.FileInfo, err error) error {
|
|||||||
}
|
}
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
|
||||||
h, err := email.Hash(r, headers)
|
h, err := email.HashReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("%s not an mail file", path)
|
glog.Errorf("%s not an mail file", path)
|
||||||
glog.Infof("%q", err.Error())
|
glog.Infof("%q", err.Error())
|
||||||
@ -67,7 +65,7 @@ func (m *Messages) hashMail(path string, info os.FileInfo, err error) error {
|
|||||||
md := email.NewInfo(path)
|
md := email.NewInfo(path)
|
||||||
m.Statuses = append(m.Statuses, Status{
|
m.Statuses = append(m.Statuses, Status{
|
||||||
Path: path,
|
Path: path,
|
||||||
Hash: fmt.Sprintf("%x", h.Sum(nil)),
|
Hash: h,
|
||||||
Read: md.Seen,
|
Read: md.Seen,
|
||||||
})
|
})
|
||||||
return nil
|
return nil
|
||||||
@ -139,14 +137,13 @@ func (m Messages) Reconcile(maildir string) error {
|
|||||||
}
|
}
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
|
||||||
h, err := email.Hash(r, headers)
|
chksum, err := email.HashReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("%s not an mail file", path)
|
glog.Errorf("%s not an mail file", path)
|
||||||
glog.Infof("%q", err.Error())
|
glog.Infof("%q", err.Error())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
chksum := fmt.Sprintf("%x", h.Sum(nil))
|
|
||||||
md := email.NewInfo(path)
|
md := email.NewInfo(path)
|
||||||
s, ok := hashMap[chksum]
|
s, ok := hashMap[chksum]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|||||||
@ -29,9 +29,6 @@ var (
|
|||||||
skipFiles = flag.String("skip", "maildirfolder,log,msgid.cache,razor-agent.log",
|
skipFiles = flag.String("skip", "maildirfolder,log,msgid.cache,razor-agent.log",
|
||||||
"comma separated files to skip")
|
"comma separated files to skip")
|
||||||
|
|
||||||
// Hashed over fields from each message.
|
|
||||||
headers = []string{"to", "from", "cc", "date", "subject", "message-id"}
|
|
||||||
|
|
||||||
total = expvar.NewInt("bytes-parsed")
|
total = expvar.NewInt("bytes-parsed")
|
||||||
cnt = expvar.NewInt("messages-parsed")
|
cnt = expvar.NewInt("messages-parsed")
|
||||||
dupCnt = expvar.NewInt("duplicates-found")
|
dupCnt = expvar.NewInt("duplicates-found")
|
||||||
@ -101,14 +98,13 @@ func Load(db *sql.DB, uid int, root string, skip *set.StringSet) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hdr_size := bytes.Index(b, CRCR)
|
hdr_size := bytes.Index(b, CRCR)
|
||||||
h, err := email.Hash(bytes.NewReader(b), headers)
|
chksum, err := email.HashReader(bytes.NewReader(b))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("%s not an mail file", path)
|
glog.Errorf("%s not an mail file", path)
|
||||||
glog.Infof("%q", err.Error())
|
glog.Infof("%q", err.Error())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
chksum := fmt.Sprintf("%x", h.Sum(nil))
|
|
||||||
if _, err := hstmt.Exec(chksum, path); err != nil {
|
if _, err := hstmt.Exec(chksum, path); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
64
hash.go
64
hash.go
@ -2,37 +2,61 @@ package email
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/sha1"
|
"crypto/sha1"
|
||||||
|
"fmt"
|
||||||
"hash"
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
"net/mail"
|
"net/mail"
|
||||||
"os"
|
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Hash will parse r as an email, and return a hash.Hash that has been applied
|
// Hasher is a list of headers that should be considered when hashing an email
|
||||||
// to the values of the specified headers.
|
// message.
|
||||||
func Hash(r io.Reader, headers []string) (hash.Hash, error) {
|
type Hasher []string
|
||||||
// Add deterministic behavior regardless of the order the users specified.
|
|
||||||
sort.Strings(headers)
|
|
||||||
var name string
|
|
||||||
if f, ok := r.(*os.File); ok {
|
|
||||||
name = f.Name()
|
|
||||||
}
|
|
||||||
|
|
||||||
h := sha1.New()
|
// Hash will parse r as an email, and return a hash.Hash that has been applied
|
||||||
|
// to the values of the headers in h.
|
||||||
|
func (h Hasher) HashMessage(msg *mail.Message) (hash.Hash, error) {
|
||||||
|
// Add deterministic behavior regardless of the order the users specified.
|
||||||
|
if !sort.IsSorted(sort.StringSlice(h)) {
|
||||||
|
sort.Strings(h)
|
||||||
|
}
|
||||||
|
hsh := sha1.New()
|
||||||
|
for _, header := range h {
|
||||||
|
v := msg.Header.Get(header)
|
||||||
|
if v == "" {
|
||||||
|
glog.V(2).Infoln("Empty", header, "header")
|
||||||
|
}
|
||||||
|
io.WriteString(hsh, v)
|
||||||
|
}
|
||||||
|
return hsh, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h Hasher) HashReader(r io.Reader) (hash.Hash, error) {
|
||||||
msg, err := mail.ReadMessage(r)
|
msg, err := mail.ReadMessage(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
return h.HashMessage(msg)
|
||||||
for _, header := range headers {
|
}
|
||||||
v := msg.Header.Get(header)
|
|
||||||
if v == "" {
|
var std = Hasher([]string{"to", "from", "cc", "date", "subject", "message-id"})
|
||||||
glog.V(2).Infoln(name, "Empty", header, "header")
|
|
||||||
}
|
// Hash will parse r as an email, and return the hash as a hexidecimal string
|
||||||
io.WriteString(h, v)
|
// using a default set of headers.
|
||||||
}
|
func HashReader(r io.Reader) (string, error) {
|
||||||
return h, nil
|
h, err := std.HashReader(r)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%x", h.Sum(nil)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func HashMessage(msg *mail.Message) (string, error) {
|
||||||
|
h, err := std.HashMessage(msg)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%x", h.Sum(nil)), nil
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user