From 1a6a9190f37fb5f5f9ede7301f09e2db8d539321 Mon Sep 17 00:00:00 2001 From: Simon Marsh Date: Sat, 25 May 2019 21:53:07 +0100 Subject: [PATCH] Add stime metric and improve commenting --- dns.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/dns.go b/dns.go index 375cc27..3e55e72 100644 --- a/dns.go +++ b/dns.go @@ -13,27 +13,36 @@ import ( log "github.com/sirupsen/logrus" "math" "strconv" + "time" ) ////////////////////////////////////////////////////////////////////////// +// data structures + +// for holding the metrics type DNSMetrics struct { soa *prometheus.GaugeVec rtt *prometheus.GaugeVec valid *prometheus.GaugeVec + stime *prometheus.GaugeVec label_map []prometheus.Labels } +// for specifying DNS servers + type DNSServer struct { role string name string ip uint8 addr string - soa uint + soa uint64 } +// hardcoded :( list of DNS servers to query + var dns_servers = []*DNSServer{ - &DNSServer{"master", "master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, + &DNSServer{"master", "j.master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, &DNSServer{"delegation", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0}, &DNSServer{"delegation", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0}, &DNSServer{"delegation", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0}, @@ -56,6 +65,7 @@ var dns_servers = []*DNSServer{ } ////////////////////////////////////////////////////////////////////////// +// initialisation function to register metrics func (m *DNSMetrics) Register() { @@ -77,6 +87,12 @@ func (m *DNSMetrics) Register() { }, []string{"role", "name", "ip", "addr"}) prometheus.MustRegister(m.valid) + m.stime = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "dn42_dns_stime", + Help: "Returns the time between now and the SOA serial number", + }, []string{"role", "name", "ip", "addr"}) + prometheus.MustRegister(m.stime) + // pre-populate the labels m.label_map = make([]prometheus.Labels, len(dns_servers)) @@ -92,28 +108,53 @@ func (m *DNSMetrics) Register() { } ////////////////////////////////////////////////////////////////////////// +// collect metrics for all DNS servers func (m *DNSMetrics) Collect() { - for ix, server := range dns_servers { - soa, rtt := server.Query() - server.soa = uint(soa) + now := uint64(time.Now().Unix()) + // go through each server in turn + for ix, server := range dns_servers { + + // query it + soa, rtt := server.Query() + server.soa = uint64(soa) + + // SOA and RTT are direct metrics returned from the query m.soa.With(m.label_map[ix]).Set(soa) m.rtt.With(m.label_map[ix]).Set(rtt) + // check if the returned SOA matches j.master.delegation-servers.dn42 var valid uint = 0 if server.soa == 0 { + // didn't get a result, server issue valid = 2 } else { if server.soa != dns_servers[0].soa { + // SOA didn't match valid = 1 } else { valid = 0 } } + // before setting whether the server is valid, calculate the stime + // (difference in time between now and the SOA, to allow checking that + // it is not stale) + // it's possible that the SOA could be in the future if there is a + // clock mismatch between monitor and DNS server, in which case this + // is flagged as a server error + + if server.soa > now { + // server error + valid = 2 + } else { + m.stime.With(m.label_map[ix]).Set(float64(now - server.soa)) + } + + // finally set the valid status m.valid.With(m.label_map[ix]).Set(float64(valid)) }