zfs_replication_exporter/zfs_replication_exporter.go

173 lines
4.5 KiB
Go

package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"regexp"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/crypto/ssh"
)
var (
addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics")
host = flag.String("host", "localhost:22", "host:port to scp from")
refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time")
user = flag.String("user", os.Getenv("USER"), "ssh user")
)
const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name"
// Example: @auto-20171001.1400-2w
var (
snapshotPattern = regexp.MustCompile(`^([^@]+)@auto-(\d{8}\.\d{4})-\d+[mwy]$`)
snapshotFormat = "20060102.1504"
fetchRequestDurationMetric = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "ssh_fetch_duration_seconds",
Help: "Time to fetch and parse snapshot age over SSH",
})
snapshotAgesMetric = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "zfs_snapshot_age_seconds",
Help: "Duration in seconds for most recent snapshot for `filesystem`",
},
[]string{"filesystem"},
)
)
func init() {
prometheus.MustRegister(fetchRequestDurationMetric)
prometheus.MustRegister(snapshotAgesMetric)
}
func newPublicKey() ([]ssh.AuthMethod, error) {
var signers []ssh.AuthMethod
for _, path := range []string{
filepath.Join(os.Getenv("HOME"), ".ssh", "id_dsa"),
filepath.Join(os.Getenv("HOME"), ".ssh", "id_rsa"),
} {
// A public key may be used to authenticate against the remote
// server by using an unencrypted PEM-encoded private key file.
//
// If you have an encrypted private key, the crypto/x509 package
// can be used to decrypt it.
key, err := ioutil.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
continue
}
return nil, fmt.Errorf("unable to read private key %q: %v", path, err)
}
// Create the Signer for this private key.
signer, err := ssh.ParsePrivateKey(key)
if err != nil {
return nil, fmt.Errorf("unable to parse private key %q: %v", path, err)
}
signers = append(signers, ssh.PublicKeys(signer))
}
if len(signers) == 0 {
return nil, fmt.Errorf("no public keys configured")
}
return signers, nil
}
func updateMetrics(c *ssh.Client) error {
now := time.Now()
defer func() {
delta := time.Since(now)
fetchRequestDurationMetric.Set(delta.Seconds())
glog.V(2).Infof("Update took %s", delta)
}()
// Each ClientConn can support multiple interactive sessions,
// represented by a Session.
s, err := c.NewSession()
if err != nil {
return fmt.Errorf("error creating new session: %v", err)
}
defer s.Close()
glog.V(2).Infof("Running %q", snapshotListCmd)
b, err := s.CombinedOutput(snapshotListCmd)
if err != nil {
return fmt.Errorf("error running %q: %v", snapshotListCmd, err)
}
scanner := bufio.NewScanner(bytes.NewReader(b))
snapshotAges := map[string]time.Time{}
for scanner.Scan() {
l := scanner.Text()
m := snapshotPattern.FindStringSubmatch(l)
if len(m) == 3 {
snapshot := m[1]
t, err := time.Parse(snapshotFormat, m[2])
if err != nil {
glog.Errorf("Malformed time in snapshot %q: %v", m[2], err)
continue
}
snapshotTime := snapshotAges[snapshot]
if snapshotTime.Before(t) {
snapshotAges[snapshot] = t
}
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("failed to scan response: %v", err)
}
for snapshot, snapshotTime := range snapshotAges {
snapshotAgesMetric.WithLabelValues(snapshot).Set(now.Sub(snapshotTime).Seconds())
}
return nil
}
func main() {
flag.Parse()
defer glog.Flush()
ams, err := newPublicKey()
if err != nil {
glog.Exitf("Error fetching public keys: %v", err)
}
// An SSH client is represented with a ClientConn.
//
// To authenticate with the remote server you must pass at least one
// implementation of ClientAuth via the Auth field in ClientConfig.
config := &ssh.ClientConfig{
User: *user,
Auth: ams,
// TODO(wathiede); use FixedHostKey?
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
c, err := ssh.Dial("tcp", *host, config)
if err != nil {
glog.Exitf("Error dialing %q: %v", *host, err)
}
go func() {
for {
if err := updateMetrics(c); err != nil {
glog.Errorf("Failed to update metrics: %v", err)
}
time.Sleep(*refreshInterval)
}
}()
// Expose the registered metrics via HTTP.
http.Handle("/metrics", promhttp.Handler())
glog.Exitf("Failed to ListenAndServe: %v", http.ListenAndServe(*addr, nil))
}