package main import ( "bufio" "bytes" "flag" "fmt" "io/ioutil" "net/http" _ "net/http/pprof" "os" "path/filepath" "regexp" "strings" "time" "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/crypto/ssh" ) var ( addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics") hosts = flag.String("hosts", os.Getenv("USER")+"@localhost:22", "user@host:port log into and scrape") refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time") ) const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name" var ( // Example: @auto-20171001.1400-2w snapshotPattern = regexp.MustCompile(`^[^@]+@auto-(\d{8}\.\d{4})-\d+[mwy]$`) snapshotFormat = "20060102.1504" fetchRequestDurationMetric = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "ssh_fetch_duration_seconds", Help: "Time to fetch and parse snapshot age over SSH", }) snapshotAgesMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_age_seconds", Help: "Duration in seconds for most recent snapshot for `filesystem`", }, []string{"host", "filesystem"}, ) snapshotTimestampMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_timestamp_seconds", Help: "Most recent snapshot timestamp for `filesystem` UNIX epoch seconds", }, []string{"host", "filesystem"}, ) snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_count", Help: "Count of snapshots per-filesystem", }, []string{"host", "filesystem", "type"}, ) ) func init() { prometheus.MustRegister(fetchRequestDurationMetric) prometheus.MustRegister(snapshotAgesMetric) prometheus.MustRegister(snapshotTimestampMetric) prometheus.MustRegister(snapshotCountsMetrics) } func newPublicKey() ([]ssh.AuthMethod, error) { var signers []ssh.AuthMethod for _, path := range []string{ filepath.Join(os.Getenv("HOME"), ".ssh", "id_dsa"), filepath.Join(os.Getenv("HOME"), ".ssh", "id_rsa"), } { // A public key may be used to authenticate against the remote // server by using an unencrypted PEM-encoded private key file. // // If you have an encrypted private key, the crypto/x509 package // can be used to decrypt it. key, err := ioutil.ReadFile(path) if err != nil { if os.IsNotExist(err) { continue } return nil, fmt.Errorf("unable to read private key %q: %v", path, err) } // Create the Signer for this private key. signer, err := ssh.ParsePrivateKey(key) if err != nil { return nil, fmt.Errorf("unable to parse private key %q: %v", path, err) } signers = append(signers, ssh.PublicKeys(signer)) } if len(signers) == 0 { return nil, fmt.Errorf("no public keys configured") } return signers, nil } func updateMetrics(host string, c *ssh.Client) error { now := time.Now() defer func() { delta := time.Since(now) fetchRequestDurationMetric.Set(delta.Seconds()) glog.V(2).Infof("[%s] Update took %s", host, delta) }() // Each ClientConn can support multiple interactive sessions, // represented by a Session. s, err := c.NewSession() if err != nil { return fmt.Errorf("[%s] error creating new session: %v", host, err) } defer s.Close() glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd) b, err := s.CombinedOutput(snapshotListCmd) if err != nil { return fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err) } scanner := bufio.NewScanner(bytes.NewReader(b)) snapshotAges := make(map[string]time.Time) snapshotCountsByFilesystem := make(map[string]int) freenasSnapshotCountsByFilesystem := make(map[string]int) for scanner.Scan() { l := scanner.Text() m := snapshotPattern.FindStringSubmatch(l) filesystem := l if idx := strings.Index(l, "@"); idx != -1 { filesystem = l[:idx] } snapshotCountsByFilesystem[filesystem]++ if len(m) == 2 { t, err := time.Parse(snapshotFormat, m[1]) if err != nil { glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[2], err) continue } freenasSnapshotCountsByFilesystem[filesystem]++ snapshotTime := snapshotAges[filesystem] if snapshotTime.Before(t) { snapshotAges[filesystem] = t } } } if err := scanner.Err(); err != nil { return fmt.Errorf("[%s] failed to scan response: %v", host, err) } for filesystem, c := range snapshotCountsByFilesystem { snapshotCountsMetrics.WithLabelValues(host, filesystem, "all").Set(float64(c)) } for filesystem, c := range freenasSnapshotCountsByFilesystem { snapshotCountsMetrics.WithLabelValues(host, filesystem, "freenas").Set(float64(c)) } for filesystem, snapshotTime := range snapshotAges { snapshotAgesMetric.WithLabelValues(host, filesystem).Set(now.Sub(snapshotTime).Seconds()) snapshotTimestampMetric.WithLabelValues(host, filesystem).Set(float64(snapshotTime.Unix())) } return nil } func main() { flag.Parse() defer glog.Flush() ams, err := newPublicKey() if err != nil { glog.Exitf("Error fetching public keys: %v", err) } for _, userHost := range strings.Split(*hosts, ",") { u := os.Getenv("USER") h := userHost if idx := strings.Index(userHost, "@"); idx != -1 { u = userHost[:idx] h = userHost[idx+1:] } go func(user, host string) { glog.Infof("Dialing %s@%s", user, host) config := &ssh.ClientConfig{ User: user, Auth: ams, // TODO(wathiede); use FixedHostKey? HostKeyCallback: ssh.InsecureIgnoreHostKey(), Timeout: 5 * time.Second, } var c *ssh.Client for { if c == nil { var err error c, err = ssh.Dial("tcp", host, config) if err != nil { glog.Errorf("Error dialing %q: %v", host, err) } } if c != nil { if err := updateMetrics(host, c); err != nil { glog.Errorf("Failed to update metrics: %v", err) c.Close() c = nil } } time.Sleep(*refreshInterval) } }(u, h) } // Expose the registered metrics via HTTP. http.Handle("/metrics", promhttp.Handler()) glog.Exitf("Failed to ListenAndServe: %v", http.ListenAndServe(*addr, nil)) }