Allow multiple hosts to be scraped from single instance.

This commit is contained in:
Bill Thiede 2017-10-10 20:58:05 -07:00
parent b93f1e01f8
commit 0a8d33ce8d

View File

@ -22,9 +22,8 @@ import (
var ( var (
addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics") addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics")
host = flag.String("host", "localhost:22", "host:port log into and scrape") hosts = flag.String("hosts", os.Getenv("USER")+"@localhost:22", "user@host:port log into and scrape")
refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time") refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time")
user = flag.String("user", os.Getenv("USER"), "ssh user")
) )
const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name" const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name"
@ -42,13 +41,13 @@ var (
Name: "zfs_snapshot_age_seconds", Name: "zfs_snapshot_age_seconds",
Help: "Duration in seconds for most recent snapshot for `filesystem`", Help: "Duration in seconds for most recent snapshot for `filesystem`",
}, },
[]string{"filesystem"}, []string{"host", "filesystem"},
) )
snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{ snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "zfs_snapshot_count", Name: "zfs_snapshot_count",
Help: "Count of snapshots per-filesystem", Help: "Count of snapshots per-filesystem",
}, },
[]string{"filesystem", "type"}, []string{"host", "filesystem", "type"},
) )
) )
@ -92,25 +91,25 @@ func newPublicKey() ([]ssh.AuthMethod, error) {
return signers, nil return signers, nil
} }
func updateMetrics(c *ssh.Client) error { func updateMetrics(host string, c *ssh.Client) error {
now := time.Now() now := time.Now()
defer func() { defer func() {
delta := time.Since(now) delta := time.Since(now)
fetchRequestDurationMetric.Set(delta.Seconds()) fetchRequestDurationMetric.Set(delta.Seconds())
glog.V(2).Infof("Update took %s", delta) glog.V(2).Infof("[%s] Update took %s", host, delta)
}() }()
// Each ClientConn can support multiple interactive sessions, // Each ClientConn can support multiple interactive sessions,
// represented by a Session. // represented by a Session.
s, err := c.NewSession() s, err := c.NewSession()
if err != nil { if err != nil {
return fmt.Errorf("error creating new session: %v", err) return fmt.Errorf("[%s] error creating new session: %v", host, err)
} }
defer s.Close() defer s.Close()
glog.V(2).Infof("Running %q", snapshotListCmd) glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd)
b, err := s.CombinedOutput(snapshotListCmd) b, err := s.CombinedOutput(snapshotListCmd)
if err != nil { if err != nil {
return fmt.Errorf("error running %q: %v", snapshotListCmd, err) return fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err)
} }
scanner := bufio.NewScanner(bytes.NewReader(b)) scanner := bufio.NewScanner(bytes.NewReader(b))
snapshotAges := make(map[string]time.Time) snapshotAges := make(map[string]time.Time)
@ -128,7 +127,7 @@ func updateMetrics(c *ssh.Client) error {
if len(m) == 2 { if len(m) == 2 {
t, err := time.Parse(snapshotFormat, m[1]) t, err := time.Parse(snapshotFormat, m[1])
if err != nil { if err != nil {
glog.Errorf("Malformed time in snapshot %q: %v", m[2], err) glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[2], err)
continue continue
} }
freenasSnapshotCountsByFilesystem[filesystem]++ freenasSnapshotCountsByFilesystem[filesystem]++
@ -139,17 +138,17 @@ func updateMetrics(c *ssh.Client) error {
} }
} }
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
return fmt.Errorf("failed to scan response: %v", err) return fmt.Errorf("[%s] failed to scan response: %v", host, err)
} }
for filesystem, c := range snapshotCountsByFilesystem { for filesystem, c := range snapshotCountsByFilesystem {
snapshotCountsMetrics.WithLabelValues(filesystem, "all").Set(float64(c)) snapshotCountsMetrics.WithLabelValues(host, filesystem, "all").Set(float64(c))
} }
for filesystem, c := range freenasSnapshotCountsByFilesystem { for filesystem, c := range freenasSnapshotCountsByFilesystem {
snapshotCountsMetrics.WithLabelValues(filesystem, "freenas").Set(float64(c)) snapshotCountsMetrics.WithLabelValues(host, filesystem, "freenas").Set(float64(c))
} }
for filesystem, snapshotTime := range snapshotAges { for filesystem, snapshotTime := range snapshotAges {
snapshotAgesMetric.WithLabelValues(filesystem).Set(now.Sub(snapshotTime).Seconds()) snapshotAgesMetric.WithLabelValues(host, filesystem).Set(now.Sub(snapshotTime).Seconds())
} }
return nil return nil
} }
@ -163,36 +162,40 @@ func main() {
glog.Exitf("Error fetching public keys: %v", err) glog.Exitf("Error fetching public keys: %v", err)
} }
// An SSH client is represented with a ClientConn. for _, userHost := range strings.Split(*hosts, ",") {
// u := os.Getenv("USER")
// To authenticate with the remote server you must pass at least one h := userHost
// implementation of ClientAuth via the Auth field in ClientConfig. if idx := strings.Index(userHost, "@"); idx != -1 {
config := &ssh.ClientConfig{ u = userHost[:idx]
User: *user, h = userHost[idx+1:]
Auth: ams,
// TODO(wathiede); use FixedHostKey?
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
Timeout: 5 * time.Second,
}
go func() {
for {
var c *ssh.Client
if c == nil {
var err error
c, err = ssh.Dial("tcp", *host, config)
if err != nil {
glog.Errorf("Error dialing %q: %v", *host, err)
}
}
if err := updateMetrics(c); err != nil {
glog.Errorf("Failed to update metrics: %v", err)
c.Close()
c = nil
}
time.Sleep(*refreshInterval)
} }
}() go func(user, host string) {
glog.Infof("Dialing %s@%s", user, host)
config := &ssh.ClientConfig{
User: user,
Auth: ams,
// TODO(wathiede); use FixedHostKey?
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
Timeout: 5 * time.Second,
}
for {
var c *ssh.Client
if c == nil {
var err error
c, err = ssh.Dial("tcp", host, config)
if err != nil {
glog.Errorf("Error dialing %q: %v", host, err)
}
}
if err := updateMetrics(host, c); err != nil {
glog.Errorf("Failed to update metrics: %v", err)
c.Close()
c = nil
}
time.Sleep(*refreshInterval)
}
}(u, h)
}
// Expose the registered metrics via HTTP. // Expose the registered metrics via HTTP.
http.Handle("/metrics", promhttp.Handler()) http.Handle("/metrics", promhttp.Handler())