diff --git a/zfs_replication_exporter.go b/zfs_replication_exporter.go index 3558961..7907502 100644 --- a/zfs_replication_exporter.go +++ b/zfs_replication_exporter.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "time" "github.com/golang/glog" @@ -30,25 +31,31 @@ const snapshotListCmd = "/sbin/zfs list -t snapshot -H -o name -s name" // Example: @auto-20171001.1400-2w var ( - snapshotPattern = regexp.MustCompile(`^([^@]+)@auto-(\d{8}\.\d{4})-\d+[mwy]$`) + snapshotPattern = regexp.MustCompile(`^[^@]+@auto-(\d{8}\.\d{4})-\d+[mwy]$`) snapshotFormat = "20060102.1504" fetchRequestDurationMetric = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "ssh_fetch_duration_seconds", Help: "Time to fetch and parse snapshot age over SSH", }) - snapshotAgesMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "zfs_snapshot_age_seconds", - Help: "Duration in seconds for most recent snapshot for `filesystem`", - }, + snapshotAgesMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "zfs_snapshot_age_seconds", + Help: "Duration in seconds for most recent snapshot for `filesystem`", + }, []string{"filesystem"}, ) + snapshotCountsMetrics = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "zfs_snapshot_count", + Help: "Count of snapshots per-filesystem", + }, + []string{"filesystem", "type"}, + ) ) func init() { prometheus.MustRegister(fetchRequestDurationMetric) prometheus.MustRegister(snapshotAgesMetric) + prometheus.MustRegister(snapshotCountsMetrics) } func newPublicKey() ([]ssh.AuthMethod, error) { @@ -106,20 +113,28 @@ func updateMetrics(c *ssh.Client) error { return fmt.Errorf("error running %q: %v", snapshotListCmd, err) } scanner := bufio.NewScanner(bytes.NewReader(b)) - snapshotAges := map[string]time.Time{} + snapshotAges := make(map[string]time.Time) + snapshotCountsByFilesystem := make(map[string]int) + freenasSnapshotCountsByFilesystem := make(map[string]int) for scanner.Scan() { l := scanner.Text() m := snapshotPattern.FindStringSubmatch(l) - if len(m) == 3 { - snapshot := m[1] - t, err := time.Parse(snapshotFormat, m[2]) + + filesystem := l + if idx := strings.Index(l, "@"); idx != -1 { + filesystem = l[:idx] + } + snapshotCountsByFilesystem[filesystem]++ + if len(m) == 2 { + t, err := time.Parse(snapshotFormat, m[1]) if err != nil { glog.Errorf("Malformed time in snapshot %q: %v", m[2], err) continue } - snapshotTime := snapshotAges[snapshot] + freenasSnapshotCountsByFilesystem[filesystem]++ + snapshotTime := snapshotAges[filesystem] if snapshotTime.Before(t) { - snapshotAges[snapshot] = t + snapshotAges[filesystem] = t } } } @@ -127,8 +142,14 @@ func updateMetrics(c *ssh.Client) error { return fmt.Errorf("failed to scan response: %v", err) } - for snapshot, snapshotTime := range snapshotAges { - snapshotAgesMetric.WithLabelValues(snapshot).Set(now.Sub(snapshotTime).Seconds()) + for filesystem, c := range snapshotCountsByFilesystem { + snapshotCountsMetrics.WithLabelValues(filesystem, "all").Set(float64(c)) + } + for filesystem, c := range freenasSnapshotCountsByFilesystem { + snapshotCountsMetrics.WithLabelValues(filesystem, "freenas").Set(float64(c)) + } + for filesystem, snapshotTime := range snapshotAges { + snapshotAgesMetric.WithLabelValues(filesystem).Set(now.Sub(snapshotTime).Seconds()) } return nil }