////////////////////////////////////////////////////////////////////////// // DNS Metrics ////////////////////////////////////////////////////////////////////////// package main ////////////////////////////////////////////////////////////////////////// import ( "encoding/json" "errors" // "fmt" dns "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "io/ioutil" "math" "net/http" "strconv" "strings" "time" ) ////////////////////////////////////////////////////////////////////////// // data structures // for holding the DNS metrics type DNSMetrics struct { soa *prometheus.GaugeVec rtt *prometheus.GaugeVec valid *prometheus.GaugeVec stime *prometheus.GaugeVec label_map []prometheus.Labels } // for specifying DNS servers type DNSServer struct { role string owner string name string ip uint8 addr string soa uint64 } // hardcoded :( list of DNS servers to query var dns_servers = []*DNSServer{ &DNSServer{"master", "jrb0001", "j.master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0}, &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0}, &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0}, &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0}, &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0}, &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0}, &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0}, &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0}, &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0}, &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0}, &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0}, &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0}, &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0}, &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0}, &DNSServer{"burble.dn42", "burble", "fr-rbx1", 6, "[fd42:4242:2601:36::ac:53]:53", 0}, &DNSServer{"burble.dn42", "burble", "us-dal3", 6, "[fd42:4242:2601:2a::ac:53]:53", 0}, &DNSServer{"burble.dn42", "burble", "sg-sin2", 6, "[fd42:4242:2601:37::ac:53]:53", 0}, &DNSServer{"burble.dn42", "burble", "ca-bhs2", 6, "[fd42:4242:2601:2d::ac:53]:53", 0}, &DNSServer{"burble.dn42", "burble", "lt-vil1", 6, "[fd42:4242:2601:3d::ac:53]:53", 0}, } // data structures for querying the current commit metric type DNSCommitMetrics struct { match *prometheus.GaugeVec lastUpdate time.Time mismatch bool since time.Time labelInvalid prometheus.Labels labelUpdate prometheus.Labels } type DNSExplorerCommit struct { Commit string } type DNSMasterCommit struct { Type string `json:"type"` URI string `json:"uri"` Branch string `json:"branch"` Commit string `json:"commit"` } ////////////////////////////////////////////////////////////////////////// // initialisation function to register metrics func (m *DNSMetrics) Register() { m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_soa", Help: "SOA for .dn42 domain", }, []string{"role", "owner", "name", "ip", "addr"}) prometheus.MustRegister(m.soa) m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_rtt", Help: "RTT when collecting SOA for .dn42 domain", }, []string{"role", "owner", "name", "ip", "addr"}) prometheus.MustRegister(m.rtt) m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_valid", Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response", }, []string{"role", "owner", "name", "ip", "addr"}) prometheus.MustRegister(m.valid) m.stime = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_stime", Help: "Returns the time between now and the SOA serial number", }, []string{"role", "owner", "name", "ip", "addr"}) prometheus.MustRegister(m.stime) // pre-populate the labels m.label_map = make([]prometheus.Labels, len(dns_servers)) for ix, server := range dns_servers { m.label_map[ix] = prometheus.Labels{ "role": server.role, "owner": server.owner, "name": server.name, "ip": strconv.Itoa(int(server.ip)), "addr": server.addr, } } } ////////////////////////////////////////////////////////////////////////// // collect metrics for all DNS servers func (m *DNSMetrics) Collect() { now := uint64(time.Now().Unix()) // icky icky icky masters := make([]uint64, 2) masters[0] = dns_servers[0].soa masters[1] = dns_servers[6].soa // go through each server in turn for ix, server := range dns_servers { // query it soa, rtt := server.Query() server.soa = uint64(soa) // SOA and RTT are direct metrics returned from the query m.soa.With(m.label_map[ix]).Set(soa) m.rtt.With(m.label_map[ix]).Set(rtt) // check if the returned SOA matches j.master.delegation-servers.dn42 var valid uint = 0 if server.soa == 0 { // didn't get a result, server issue valid = 2 } else { // check if the SOA matches any defined master SOA // assume not valid = 1 for _, soa := range masters { if server.soa == soa { // match was found valid = 0 break } } } // before setting whether the server is valid, calculate the stime // (difference in time between now and the SOA, to allow checking that // it is not stale) // it's possible that the SOA could be in the future if there is a // clock mismatch between monitor and DNS server, in which case this // is flagged as a server error if server.soa > now { // server error valid = 2 } else { m.stime.With(m.label_map[ix]).Set(float64(now - server.soa)) } // finally set the valid status m.valid.With(m.label_map[ix]).Set(float64(valid)) } } ////////////////////////////////////////////////////////////////////////// func (s *DNSServer) Query() (float64, float64) { msg := new(dns.Msg) msg.Id = dns.Id() msg.RecursionDesired = (s.role == "recursive") msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}} client := new(dns.Client) resp, rtt, err := client.Exchange(msg, s.addr) if err != nil || len(resp.Answer) != 1 { log.WithFields(log.Fields{ "error": err, "resp": resp, "server": s, }).Warn("Unable to query DNS server") return 0, 0 } if soa, ok := resp.Answer[0].(*dns.SOA); ok { return float64(soa.Serial), math.Round(rtt.Seconds() * 1000) } return 0, 0 } ////////////////////////////////////////////////////////////////////////// func (m *DNSCommitMetrics) Register() { m.match = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_commit", Help: "Time since commits last matched (or -1 for error)", }, []string{"metric"}) prometheus.MustRegister(m.match) m.labelInvalid = prometheus.Labels{ "metric": "invalid", } m.labelUpdate = prometheus.Labels{ "metric": "update", } } func (m *DNSCommitMetrics) Collect() { now := time.Now() interval := now.Sub(m.lastUpdate) // only check if it's more than 60 mins since the last successful match if (interval.Hours() >= 1.0) || m.mismatch { // fetch the current commit from the explorer ec, err := m.FetchExplorerCommit() if err != nil { // couldn't fetch the explorer commit value, set the metric to bad m.match.With(m.labelUpdate).Set(-1.0) m.match.With(m.labelInvalid).Set(-1.0) m.mismatch = true return } mc, err := m.FetchMasterCommit() if err != nil { // couldn't fetch master commit value, set the metric to bad m.match.With(m.labelUpdate).Set(-2.0) m.match.With(m.labelInvalid).Set(-1.0) m.mismatch = true return } m.lastUpdate = now m.match.With(m.labelUpdate).Set(0.0) if ec == mc { // if the commits match, null the metric m.match.With(m.labelInvalid).Set(0.0) m.mismatch = false } else { // was this the first mismatch ? if !m.mismatch { m.since = now m.mismatch = true } since := now.Sub(m.since).Seconds() log.WithFields(log.Fields{ "master": mc, "explorer": ec, "since": since, }).Warn("DNS Commit Mismatch") // set the metric to be the interval since last good match m.match.With(m.labelInvalid).Set(float64(since)) } } else { // update time since last check m.match.With(m.labelUpdate).Set(float64(interval.Seconds())) } } ////////////////////////////////////////////////////////////////////////// func (m *DNSCommitMetrics) FetchExplorerCommit() (string, error) { response, err := http.Get("http://collector.dn42:8043/api/registry/.meta") if err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable to query registry explorer") return "", err } data, err := ioutil.ReadAll(response.Body) if err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable read explorer response") return "", err } var ec DNSExplorerCommit if err := json.Unmarshal(data, &ec); err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable to parse explorer JSON") return "", err } return strings.ToLower(ec.Commit), nil } func (m *DNSCommitMetrics) FetchMasterCommit() (string, error) { response, err := http.Get("http://[fd42:180:3de0:10:5054:ff:fe87:ea39]:8080/api/git-db-state") if err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable to query registry master") return "", err } data, err := ioutil.ReadAll(response.Body) if err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable read master response") return "", err } var mc []DNSMasterCommit if err := json.Unmarshal(data, &mc); err != nil { log.WithFields(log.Fields{ "error": err, }).Warn("Unable to parse master JSON") return "", err } for _, c := range mc { if c.URI == "https://git.dn42.us/dn42/registry.git" { return strings.ToLower(c.Commit), nil } } log.WithFields(log.Fields{ "MasterCommit": mc, }).Warn("Unable to find registry commit from master") return "", errors.New("Unable to find registry commit from master") } ////////////////////////////////////////////////////////////////////////// // end of code