214 lines
6.0 KiB
Go
214 lines
6.0 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
|
|
"golang.org/x/crypto/ssh"
|
|
)
|
|
|
|
var (
|
|
addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics")
|
|
hosts = flag.String("hosts", os.Getenv("USER")+"@localhost:22", "user@host:port log into and scrape")
|
|
refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time")
|
|
)
|
|
|
|
const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name"
|
|
|
|
// Example: @auto-20171001.1400-2w
|
|
var (
|
|
snapshotPattern = regexp.MustCompile(`^[^@]+@auto-(\d{8}\.\d{4})-\d+[mwy]$`)
|
|
snapshotFormat = "20060102.1504"
|
|
|
|
fetchRequestDurationMetric = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Name: "ssh_fetch_duration_seconds",
|
|
Help: "Time to fetch and parse snapshot age over SSH",
|
|
})
|
|
snapshotAgesMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "zfs_snapshot_age_seconds",
|
|
Help: "Duration in seconds for most recent snapshot for `filesystem`",
|
|
},
|
|
[]string{"host", "filesystem"},
|
|
)
|
|
snapshotTimestampMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "zfs_snapshot_timestamp_seconds",
|
|
Help: "Most recent snapshot timestamp for `filesystem` UNIX epoch seconds",
|
|
},
|
|
[]string{"host", "filesystem"},
|
|
)
|
|
snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "zfs_snapshot_count",
|
|
Help: "Count of snapshots per-filesystem",
|
|
},
|
|
[]string{"host", "filesystem", "type"},
|
|
)
|
|
)
|
|
|
|
func init() {
|
|
prometheus.MustRegister(fetchRequestDurationMetric)
|
|
prometheus.MustRegister(snapshotAgesMetric)
|
|
prometheus.MustRegister(snapshotTimestampMetric)
|
|
prometheus.MustRegister(snapshotCountsMetrics)
|
|
}
|
|
|
|
func newPublicKey() ([]ssh.AuthMethod, error) {
|
|
var signers []ssh.AuthMethod
|
|
|
|
for _, path := range []string{
|
|
filepath.Join(os.Getenv("HOME"), ".ssh", "id_dsa"),
|
|
filepath.Join(os.Getenv("HOME"), ".ssh", "id_rsa"),
|
|
} {
|
|
|
|
// A public key may be used to authenticate against the remote
|
|
// server by using an unencrypted PEM-encoded private key file.
|
|
//
|
|
// If you have an encrypted private key, the crypto/x509 package
|
|
// can be used to decrypt it.
|
|
key, err := ioutil.ReadFile(path)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return nil, fmt.Errorf("unable to read private key %q: %v", path, err)
|
|
}
|
|
|
|
// Create the Signer for this private key.
|
|
signer, err := ssh.ParsePrivateKey(key)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to parse private key %q: %v", path, err)
|
|
}
|
|
signers = append(signers, ssh.PublicKeys(signer))
|
|
}
|
|
if len(signers) == 0 {
|
|
return nil, fmt.Errorf("no public keys configured")
|
|
}
|
|
return signers, nil
|
|
}
|
|
|
|
func updateMetrics(host string, c *ssh.Client) error {
|
|
now := time.Now()
|
|
defer func() {
|
|
delta := time.Since(now)
|
|
fetchRequestDurationMetric.Set(delta.Seconds())
|
|
glog.V(2).Infof("[%s] Update took %s", host, delta)
|
|
}()
|
|
// Each ClientConn can support multiple interactive sessions,
|
|
// represented by a Session.
|
|
s, err := c.NewSession()
|
|
if err != nil {
|
|
return fmt.Errorf("[%s] error creating new session: %v", host, err)
|
|
}
|
|
defer s.Close()
|
|
|
|
glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd)
|
|
b, err := s.CombinedOutput(snapshotListCmd)
|
|
if err != nil {
|
|
return fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err)
|
|
}
|
|
scanner := bufio.NewScanner(bytes.NewReader(b))
|
|
snapshotAges := make(map[string]time.Time)
|
|
snapshotCountsByFilesystem := make(map[string]int)
|
|
freenasSnapshotCountsByFilesystem := make(map[string]int)
|
|
for scanner.Scan() {
|
|
l := scanner.Text()
|
|
m := snapshotPattern.FindStringSubmatch(l)
|
|
|
|
filesystem := l
|
|
if idx := strings.Index(l, "@"); idx != -1 {
|
|
filesystem = l[:idx]
|
|
}
|
|
snapshotCountsByFilesystem[filesystem]++
|
|
if len(m) == 2 {
|
|
t, err := time.Parse(snapshotFormat, m[1])
|
|
if err != nil {
|
|
glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[2], err)
|
|
continue
|
|
}
|
|
freenasSnapshotCountsByFilesystem[filesystem]++
|
|
snapshotTime := snapshotAges[filesystem]
|
|
if snapshotTime.Before(t) {
|
|
snapshotAges[filesystem] = t
|
|
}
|
|
}
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
return fmt.Errorf("[%s] failed to scan response: %v", host, err)
|
|
}
|
|
|
|
for filesystem, c := range snapshotCountsByFilesystem {
|
|
snapshotCountsMetrics.WithLabelValues(host, filesystem, "all").Set(float64(c))
|
|
}
|
|
for filesystem, c := range freenasSnapshotCountsByFilesystem {
|
|
snapshotCountsMetrics.WithLabelValues(host, filesystem, "freenas").Set(float64(c))
|
|
}
|
|
for filesystem, snapshotTime := range snapshotAges {
|
|
snapshotAgesMetric.WithLabelValues(host, filesystem).Set(now.Sub(snapshotTime).Seconds())
|
|
snapshotTimestampMetric.WithLabelValues(host, filesystem).Set(float64(snapshotTime.Unix()))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
defer glog.Flush()
|
|
|
|
ams, err := newPublicKey()
|
|
if err != nil {
|
|
glog.Exitf("Error fetching public keys: %v", err)
|
|
}
|
|
|
|
for _, userHost := range strings.Split(*hosts, ",") {
|
|
u := os.Getenv("USER")
|
|
h := userHost
|
|
if idx := strings.Index(userHost, "@"); idx != -1 {
|
|
u = userHost[:idx]
|
|
h = userHost[idx+1:]
|
|
}
|
|
go func(user, host string) {
|
|
glog.Infof("Dialing %s@%s", user, host)
|
|
config := &ssh.ClientConfig{
|
|
User: user,
|
|
Auth: ams,
|
|
// TODO(wathiede); use FixedHostKey?
|
|
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
|
Timeout: 5 * time.Second,
|
|
}
|
|
for {
|
|
var c *ssh.Client
|
|
if c == nil {
|
|
var err error
|
|
c, err = ssh.Dial("tcp", host, config)
|
|
if err != nil {
|
|
glog.Errorf("Error dialing %q: %v", host, err)
|
|
}
|
|
}
|
|
if c != nil {
|
|
if err := updateMetrics(host, c); err != nil {
|
|
glog.Errorf("Failed to update metrics: %v", err)
|
|
c.Close()
|
|
c = nil
|
|
}
|
|
}
|
|
time.Sleep(*refreshInterval)
|
|
}
|
|
}(u, h)
|
|
}
|
|
|
|
// Expose the registered metrics via HTTP.
|
|
http.Handle("/metrics", promhttp.Handler())
|
|
glog.Exitf("Failed to ListenAndServe: %v", http.ListenAndServe(*addr, nil))
|
|
}
|