Drop precomputed ages. Show stats on front page.
This commit is contained in:
parent
af06cb6585
commit
15b40a2866
@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
@ -12,6 +13,7 @@ import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@ -38,12 +40,6 @@ var (
|
||||
Name: "ssh_fetch_duration_seconds",
|
||||
Help: "Time to fetch and parse snapshot age over SSH",
|
||||
})
|
||||
snapshotAgesMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "zfs_snapshot_age_seconds",
|
||||
Help: "Duration in seconds for most recent snapshot for `filesystem`",
|
||||
},
|
||||
[]string{"host", "filesystem"},
|
||||
)
|
||||
snapshotTimestampMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "zfs_snapshot_timestamp_seconds",
|
||||
Help: "Most recent snapshot timestamp for `filesystem` UNIX epoch seconds",
|
||||
@ -60,7 +56,6 @@ var (
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(fetchRequestDurationMetric)
|
||||
prometheus.MustRegister(snapshotAgesMetric)
|
||||
prometheus.MustRegister(snapshotTimestampMetric)
|
||||
prometheus.MustRegister(snapshotCountsMetrics)
|
||||
}
|
||||
@ -99,7 +94,19 @@ func newPublicKey() ([]ssh.AuthMethod, error) {
|
||||
return signers, nil
|
||||
}
|
||||
|
||||
func updateMetrics(host string, c *ssh.Client) error {
|
||||
type filesystemName string
|
||||
type filesystemStat struct {
|
||||
// Timestamp of most recent snapshot
|
||||
Timestamp time.Time
|
||||
// Counts is the number of snapshots
|
||||
Counts int
|
||||
// FreenasCounts is the number of snapshots matching freenas' auto-snapshot
|
||||
// names.
|
||||
FreenasCounts int
|
||||
}
|
||||
type snapshotStats map[filesystemName]*filesystemStat
|
||||
|
||||
func fetchSnapshotStats(host string, c *ssh.Client) (snapshotStats, error) {
|
||||
now := time.Now()
|
||||
defer func() {
|
||||
delta := time.Since(now)
|
||||
@ -110,19 +117,17 @@ func updateMetrics(host string, c *ssh.Client) error {
|
||||
// represented by a Session.
|
||||
s, err := c.NewSession()
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] error creating new session: %v", host, err)
|
||||
return nil, fmt.Errorf("[%s] error creating new session: %v", host, err)
|
||||
}
|
||||
defer s.Close()
|
||||
|
||||
glog.V(2).Infof("[%s] Running %q", host, snapshotListCmd)
|
||||
b, err := s.CombinedOutput(snapshotListCmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err)
|
||||
return nil, fmt.Errorf("[%s] error running %q: %v", host, snapshotListCmd, err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
snapshotAges := make(map[string]time.Time)
|
||||
snapshotCountsByFilesystem := make(map[string]int)
|
||||
freenasSnapshotCountsByFilesystem := make(map[string]int)
|
||||
stats := snapshotStats(make(map[filesystemName]*filesystemStat))
|
||||
for scanner.Scan() {
|
||||
l := scanner.Text()
|
||||
m := snapshotPattern.FindStringSubmatch(l)
|
||||
@ -131,35 +136,89 @@ func updateMetrics(host string, c *ssh.Client) error {
|
||||
if idx := strings.Index(l, "@"); idx != -1 {
|
||||
filesystem = l[:idx]
|
||||
}
|
||||
snapshotCountsByFilesystem[filesystem]++
|
||||
name := filesystemName(filesystem)
|
||||
if _, ok := stats[name]; !ok {
|
||||
stats[name] = &filesystemStat{}
|
||||
}
|
||||
stats[name].Counts++
|
||||
if len(m) == 2 {
|
||||
t, err := time.Parse(snapshotFormat, m[1])
|
||||
if err != nil {
|
||||
glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[2], err)
|
||||
glog.Errorf("[%s] Malformed time in snapshot %q: %v", host, m[1], err)
|
||||
continue
|
||||
}
|
||||
freenasSnapshotCountsByFilesystem[filesystem]++
|
||||
snapshotTime := snapshotAges[filesystem]
|
||||
glog.V(3).Infof("filesystem: %s timestamp %v", l, t)
|
||||
stats[name].FreenasCounts++
|
||||
snapshotTime := stats[name].Timestamp
|
||||
glog.V(3).Infof("snapshotTime.Before(t) = %v snapshotTime: %v t: %v", snapshotTime.Before(t), snapshotTime, t)
|
||||
if snapshotTime.Before(t) {
|
||||
snapshotAges[filesystem] = t
|
||||
stats[name].Timestamp = t
|
||||
}
|
||||
} else {
|
||||
glog.V(3).Infof("[%s] Skipping snapshot with non-conforming timestamp %q", host, l)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return fmt.Errorf("[%s] failed to scan response: %v", host, err)
|
||||
return nil, fmt.Errorf("[%s] failed to scan response: %v", host, err)
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
for filesystem, c := range snapshotCountsByFilesystem {
|
||||
snapshotCountsMetrics.WithLabelValues(host, filesystem, "all").Set(float64(c))
|
||||
func updateMetrics(host string, stats snapshotStats) {
|
||||
for filesystem, stat := range stats {
|
||||
snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "all").Set(float64(stat.Counts))
|
||||
snapshotCountsMetrics.WithLabelValues(host, string(filesystem), "freenas").Set(float64(stat.FreenasCounts))
|
||||
snapshotTimestampMetric.WithLabelValues(host, string(filesystem)).Set(float64(stat.Timestamp.Unix()))
|
||||
}
|
||||
for filesystem, c := range freenasSnapshotCountsByFilesystem {
|
||||
snapshotCountsMetrics.WithLabelValues(host, filesystem, "freenas").Set(float64(c))
|
||||
}
|
||||
|
||||
type hostsSnapshotStats struct {
|
||||
sync.Mutex
|
||||
host2Stats map[string]snapshotStats
|
||||
}
|
||||
|
||||
var indexTmpl = template.Must(template.New("index").Parse(`<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||
|
||||
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
|
||||
<title>ZFS replication exporter</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Debugging info for ZFS replication exporter</h1>
|
||||
{{range $host, $snapStats := .}}
|
||||
<h2>{{$host}}</h2>
|
||||
<table class="table">
|
||||
<tr>
|
||||
<th>Filesystem</th>
|
||||
<th>Counts</th>
|
||||
<th>FreeNAS Snapshots</th>
|
||||
<th>Most Recent</th>
|
||||
</tr>
|
||||
{{range $name, $fsStat := .}}
|
||||
<tr>
|
||||
<td>{{$name}}</td>
|
||||
<td>{{$fsStat.Counts}}</td>
|
||||
<td>{{$fsStat.FreenasCounts}}</td>
|
||||
<td>{{if $fsStat.FreenasCounts }}{{$fsStat.Timestamp}}{{end}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</table>
|
||||
{{end}}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
|
||||
func (hss *hostsSnapshotStats) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
hss.Lock()
|
||||
defer hss.Unlock()
|
||||
if err := indexTmpl.Execute(w, hss.host2Stats); err != nil {
|
||||
glog.Errorf("Failed to render index: %v", err)
|
||||
}
|
||||
for filesystem, snapshotTime := range snapshotAges {
|
||||
snapshotAgesMetric.WithLabelValues(host, filesystem).Set(now.Sub(snapshotTime).Seconds())
|
||||
snapshotTimestampMetric.WithLabelValues(host, filesystem).Set(float64(snapshotTime.Unix()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -171,6 +230,10 @@ func main() {
|
||||
glog.Exitf("Error fetching public keys: %v", err)
|
||||
}
|
||||
|
||||
hss := &hostsSnapshotStats{
|
||||
host2Stats: make(map[string]snapshotStats),
|
||||
}
|
||||
|
||||
for _, userHost := range strings.Split(*hosts, ",") {
|
||||
u := os.Getenv("USER")
|
||||
h := userHost
|
||||
@ -197,11 +260,16 @@ func main() {
|
||||
}
|
||||
}
|
||||
if c != nil {
|
||||
if err := updateMetrics(host, c); err != nil {
|
||||
stats, err := fetchSnapshotStats(host, c)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to update metrics: %v", err)
|
||||
c.Close()
|
||||
c = nil
|
||||
}
|
||||
hss.Lock()
|
||||
hss.host2Stats[host] = stats
|
||||
hss.Unlock()
|
||||
updateMetrics(host, stats)
|
||||
}
|
||||
time.Sleep(*refreshInterval)
|
||||
}
|
||||
@ -209,6 +277,7 @@ func main() {
|
||||
}
|
||||
|
||||
// Expose the registered metrics via HTTP.
|
||||
http.Handle("/", hss)
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
glog.Exitf("Failed to ListenAndServe: %v", http.ListenAndServe(*addr, nil))
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user