diff --git a/zfs_replication_exporter.go b/zfs_replication_exporter.go index 7907502..dc6bccc 100644 --- a/zfs_replication_exporter.go +++ b/zfs_replication_exporter.go @@ -22,9 +22,8 @@ import ( var ( addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics") - host = flag.String("host", "localhost:22", "host:port log into and scrape") + hosts = flag.String("hosts", os.Getenv("USER")+"@localhost:22", "user@host:port log into and scrape") refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time") - user = flag.String("user", os.Getenv("USER"), "ssh user") ) const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name" @@ -42,13 +41,13 @@ var ( Name: "zfs_snapshot_age_seconds", Help: "Duration in seconds for most recent snapshot for `filesystem`", }, - []string{"filesystem"}, + []string{"host", "filesystem"}, ) snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_count", Help: "Count of snapshots per-filesystem", }, - []string{"filesystem", "type"}, + []string{"host", "filesystem", "type"}, ) ) @@ -92,25 +91,25 @@ func newPublicKey() ([]ssh.AuthMethod, error) { return signers, nil } -func updateMetrics(c *ssh.Client) error { +func updateMetrics(host string, c *ssh.Client) error { now := time.Now() defer func() { delta := time.Since(now) fetchRequestDurationMetric.Set(delta.Seconds()) - glog.V(2).Infof("Update took %s", delta) + glog.V(2).Infof("[%s] Update took %s", host, delta) }() // Each ClientConn can support multiple interactive sessions, // represented by a Session. s, err := c.NewSession() if err != nil { - return fmt.Errorf("error creating new session: %v", err) + return fmt.Errorf("[%s] error creating new session: %v", host, err) } defer s.Close() - glog.V(2).Infof("Running %q", snapshotListCmd) + glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd) b, err := s.CombinedOutput(snapshotListCmd) if err != nil { - return fmt.Errorf("error running %q: %v", snapshotListCmd, err) + return fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err) } scanner := bufio.NewScanner(bytes.NewReader(b)) snapshotAges := make(map[string]time.Time) @@ -128,7 +127,7 @@ func updateMetrics(c *ssh.Client) error { if len(m) == 2 { t, err := time.Parse(snapshotFormat, m[1]) if err != nil { - glog.Errorf("Malformed time in snapshot %q: %v", m[2], err) + glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[2], err) continue } freenasSnapshotCountsByFilesystem[filesystem]++ @@ -139,17 +138,17 @@ func updateMetrics(c *ssh.Client) error { } } if err := scanner.Err(); err != nil { - return fmt.Errorf("failed to scan response: %v", err) + return fmt.Errorf("[%s] failed to scan response: %v", host, err) } for filesystem, c := range snapshotCountsByFilesystem { - snapshotCountsMetrics.WithLabelValues(filesystem, "all").Set(float64(c)) + snapshotCountsMetrics.WithLabelValues(host, filesystem, "all").Set(float64(c)) } for filesystem, c := range freenasSnapshotCountsByFilesystem { - snapshotCountsMetrics.WithLabelValues(filesystem, "freenas").Set(float64(c)) + snapshotCountsMetrics.WithLabelValues(host, filesystem, "freenas").Set(float64(c)) } for filesystem, snapshotTime := range snapshotAges { - snapshotAgesMetric.WithLabelValues(filesystem).Set(now.Sub(snapshotTime).Seconds()) + snapshotAgesMetric.WithLabelValues(host, filesystem).Set(now.Sub(snapshotTime).Seconds()) } return nil } @@ -163,36 +162,40 @@ func main() { glog.Exitf("Error fetching public keys: %v", err) } - // An SSH client is represented with a ClientConn. - // - // To authenticate with the remote server you must pass at least one - // implementation of ClientAuth via the Auth field in ClientConfig. - config := &ssh.ClientConfig{ - User: *user, - Auth: ams, - // TODO(wathiede); use FixedHostKey? - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - Timeout: 5 * time.Second, - } - - go func() { - for { - var c *ssh.Client - if c == nil { - var err error - c, err = ssh.Dial("tcp", *host, config) - if err != nil { - glog.Errorf("Error dialing %q: %v", *host, err) - } - } - if err := updateMetrics(c); err != nil { - glog.Errorf("Failed to update metrics: %v", err) - c.Close() - c = nil - } - time.Sleep(*refreshInterval) + for _, userHost := range strings.Split(*hosts, ",") { + u := os.Getenv("USER") + h := userHost + if idx := strings.Index(userHost, "@"); idx != -1 { + u = userHost[:idx] + h = userHost[idx+1:] } - }() + go func(user, host string) { + glog.Infof("Dialing %s@%s", user, host) + config := &ssh.ClientConfig{ + User: user, + Auth: ams, + // TODO(wathiede); use FixedHostKey? + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + Timeout: 5 * time.Second, + } + for { + var c *ssh.Client + if c == nil { + var err error + c, err = ssh.Dial("tcp", host, config) + if err != nil { + glog.Errorf("Error dialing %q: %v", host, err) + } + } + if err := updateMetrics(host, c); err != nil { + glog.Errorf("Failed to update metrics: %v", err) + c.Close() + c = nil + } + time.Sleep(*refreshInterval) + } + }(u, h) + } // Expose the registered metrics via HTTP. http.Handle("/metrics", promhttp.Handler())