package main import ( "bufio" "bytes" "flag" "fmt" "html/template" "io/ioutil" "net/http" _ "net/http/pprof" "os" "path/filepath" "regexp" "strings" "sync" "time" "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "golang.org/x/crypto/ssh" ) var ( addr = flag.String("addr", "localhost:9999", "HTTP listen address for prometheus /metrics") hosts = flag.String("hosts", os.Getenv("USER")+"@localhost:22", "user@host:port log into and scrape") refreshInterval = flag.Duration("refresh", 5*time.Minute, "refresh interval time") ) const snapshotListCmd = "zfs list -t snapshot -H -o name -s name" var ( // FreeNAS example: // @auto-20171001.1400-2w freenasSnapshotPattern = regexp.MustCompile(`^[^@]+@auto-(\d{8}\.\d{4})-\d+[mwy]$`) // Linux examples: // @zfs-auto-snap_frequent-2023-01-15-18h00U // @zfs-auto-snap_frequent-2023-01-15-18h15U // @zfs-auto-snap_frequent-2023-01-15-18h30U // @zfs-auto-snap_frequent-2023-01-15-18h45U // @zfs-auto-snap_daily-2023-01-14-08h00U // @zfs-auto-snap_hourly-2023-01-14-19h00U // @zfs-auto-snap_weekly-2022-12-19-08h00U // @zfs-auto-snap_monthly-2022-12-01-08h00U linuxSnapshotPattern = regexp.MustCompile(`^[^@]+@zfs-auto-snap_(?:frequent|daily|hourly|weekly|monthly)-(\d{4}-\d{2}-\d{2}-\d{2}h\d{2}U)$`) // @mom-20230812163001 autosnapshotPattern = regexp.MustCompile(`^[^@]+@[^-]+-(\d{14})$`) freenasSnapshotFormat = "20060102.1504" linuxSnapshotFormat = "2006-01-02-15h04U" autosnapshotFormat = "20060102150405" fetchRequestDurationMetric = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "ssh_fetch_duration_seconds", Help: "Time to fetch and parse snapshot age over SSH", }) snapshotTimestampMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_timestamp_seconds", Help: "Most recent snapshot timestamp for `filesystem` UNIX epoch seconds", }, []string{"host", "filesystem"}, ) snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "zfs_snapshot_count", Help: "Count of snapshots per-filesystem", }, []string{"host", "filesystem", "type"}, ) ) func init() { prometheus.MustRegister(fetchRequestDurationMetric) prometheus.MustRegister(snapshotTimestampMetric) prometheus.MustRegister(snapshotCountsMetrics) } func newPublicKey() ([]ssh.AuthMethod, error) { var signers []ssh.AuthMethod possiblePaths := []string{ filepath.Join(os.Getenv("HOME"), ".ssh", "id_dsa"), filepath.Join(os.Getenv("HOME"), ".ssh", "id_rsa"), } if p := os.Getenv("SSH_PRIVATE_KEY"); p != "" { possiblePaths = append([]string{p}, possiblePaths...) } for _, path := range possiblePaths { // A public key may be used to authenticate against the remote // server by using an unencrypted PEM-encoded private key file. // // If you have an encrypted private key, the crypto/x509 package // can be used to decrypt it. key, err := ioutil.ReadFile(path) if err != nil { if os.IsNotExist(err) { continue } return nil, fmt.Errorf("unable to read private key %q: %v", path, err) } glog.Infof("Using private key %q", path) // Create the Signer for this private key. signer, err := ssh.ParsePrivateKey(key) if err != nil { return nil, fmt.Errorf("unable to parse private key %q: %v", path, err) } signers = append(signers, ssh.PublicKeys(signer)) } if len(signers) == 0 { return nil, fmt.Errorf("no public keys configured") } return signers, nil } type filesystemName string type filesystemStat struct { // Timestamp of most recent snapshot Timestamp time.Time // Counts is the number of snapshots Counts int // FreenasCounts is the number of snapshots matching freenas' auto-snapshot // names. FreenasCounts int // LinuxCounts is the number of snapshots matching linux' auto-snapshot // names. LinuxCounts int // AutosnapshotCounts is the number of snapshots matching zfs-autosnapshot's // auto-snapshot names. AutosnapshotCounts int } type snapshotStats map[filesystemName]*filesystemStat func fetchSnapshotStats(host string, c *ssh.Client) (snapshotStats, error) { now := time.Now() defer func() { delta := time.Since(now) fetchRequestDurationMetric.Set(delta.Seconds()) glog.V(2).Infof("[%s] Update took %s", host, delta) }() // Each ClientConn can support multiple interactive sessions, // represented by a Session. s, err := c.NewSession() if err != nil { return nil, fmt.Errorf("[%s] error creating new session: %v", host, err) } defer s.Close() glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd) b, err := s.CombinedOutput(snapshotListCmd) if err != nil { return nil, fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err) } scanner := bufio.NewScanner(bytes.NewReader(b)) stats := snapshotStats(make(map[filesystemName]*filesystemStat)) for scanner.Scan() { l := scanner.Text() filesystem := l if idx := strings.Index(l, "@"); idx != -1 { filesystem = l[:idx] } name := filesystemName(filesystem) if _, ok := stats[name]; !ok { stats[name] = &filesystemStat{} } stats[name].Counts++ var foundSnapshot bool if m := freenasSnapshotPattern.FindStringSubmatch(l); len(m) == 2 { foundSnapshot = true t, err := time.Parse(freenasSnapshotFormat, m[1]) if err != nil { glog.Errorf("[%s] Malformed time in freenas snapshot %q: %v", host, m[1], err) continue } glog.V(3).Infof("filesystem: %s timestamp %v", l, t) stats[name].FreenasCounts++ snapshotTime := stats[name].Timestamp glog.V(3).Infof("snapshotTime.Before(t) = %v snapshotTime: %v t: %v", snapshotTime.Before(t), snapshotTime, t) if snapshotTime.Before(t) { stats[name].Timestamp = t } } if m := linuxSnapshotPattern.FindStringSubmatch(l); len(m) == 2 { foundSnapshot = true t, err := time.Parse(linuxSnapshotFormat, m[1]) if err != nil { glog.Errorf("[%s] Malformed time in linux snapshot %q: %v", host, m[1], err) continue } glog.V(3).Infof("filesystem: %s timestamp %v", l, t) stats[name].LinuxCounts++ snapshotTime := stats[name].Timestamp glog.V(3).Infof("snapshotTime.Before(t) = %v snapshotTime: %v t: %v", snapshotTime.Before(t), snapshotTime, t) if snapshotTime.Before(t) { stats[name].Timestamp = t } } if m := autosnapshotPattern.FindStringSubmatch(l); len(m) == 2 { foundSnapshot = true t, err := time.Parse(autosnapshotFormat, m[1]) if err != nil { glog.Errorf("[%s] Malformed time in autosnapshot %q: %v", host, m[1], err) continue } glog.V(3).Infof("filesystem: %s timestamp %v", l, t) stats[name].AutosnapshotCounts++ snapshotTime := stats[name].Timestamp glog.V(3).Infof("snapshotTime.Before(t) = %v snapshotTime: %v t: %v", snapshotTime.Before(t), snapshotTime, t) if snapshotTime.Before(t) { stats[name].Timestamp = t } } if !foundSnapshot { glog.V(3).Infof("[%s] Skipping snapshot with non-conforming timestamp %q", host, l) } } if err := scanner.Err(); err != nil { return nil, fmt.Errorf("[%s] failed to scan response: %v", host, err) } return stats, nil } func updateMetrics(host string, stats snapshotStats) { for filesystem, stat := range stats { snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "all").Set(float64(stat.Counts)) snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "freenas").Set(float64(stat.FreenasCounts)) snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "linux").Set(float64(stat.LinuxCounts)) snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "autosnapshot").Set(float64(stat.AutosnapshotCounts)) snapshotTimestampMetric.WithLabelValues(host, string(filesystem)).Set(float64(stat.Timestamp.Unix())) } } type hostsSnapshotStats struct { sync.Mutex host2Stats map[string]snapshotStats } var indexTmpl = template.Must(template.New("index").Parse(`
Metrics at /metrics {{with .}} {{range $host, $snapStats := .}}
| Filesystem | Counts | FreeNAS Snapshots | Linux Snapshots | Autosnapshot Snapshots | Most Recent |
|---|---|---|---|---|---|
| {{$name}} | {{$fsStat.Counts}} | {{$fsStat.FreenasCounts}} | {{$fsStat.LinuxCounts}} | {{$fsStat.AutosnapshotCounts}} | {{if or $fsStat.LinuxCounts $fsStat.FreenasCounts $fsStat.AutosnapshotCounts }}{{$fsStat.Timestamp}}{{end}} |
Please refresh later.
{{end}}