diff --git a/dns.go b/dns.go index 323117f..eefe95b 100644 --- a/dns.go +++ b/dns.go @@ -9,7 +9,7 @@ package main import ( "encoding/json" "errors" - // "fmt" + // "fmt" dns "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" @@ -34,7 +34,7 @@ type DNSMetrics struct { label_map []prometheus.Labels } -// for specifying DNS servers +// structure for specifying DNS servers type DNSServer struct { role string @@ -42,45 +42,15 @@ type DNSServer struct { name string ip uint8 addr string - soa uint64 + soa uint32 } -// hardcoded :( list of DNS servers to query +// structure for returning relevant DNS data -var dns_servers = []*DNSServer{ - &DNSServer{"master", "jrb0001", "j.master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, - &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0}, - &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0}, - &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 4, "172.20.1.254:53", 0}, - &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:1:a526:d935:281e:22d6]:53", 0}, - &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0}, - &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0}, - &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0}, - &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0}, - &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0}, - &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0}, - &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 4, "172.20.1.255:53", 0}, - &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:1:69c2:2b0e:17e8:c215]:53", 0}, - &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0}, - &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0}, - &DNSServer{"burble.dn42", "burble", "fr-rbx1", 6, "[fd42:4242:2601:36::ac:53]:53", 0}, - &DNSServer{"burble.dn42", "burble", "us-dal3", 6, "[fd42:4242:2601:2a::ac:53]:53", 0}, - &DNSServer{"burble.dn42", "burble", "sg-sin2", 6, "[fd42:4242:2601:37::ac:53]:53", 0}, - &DNSServer{"burble.dn42", "burble", "ca-bhs2", 6, "[fd42:4242:2601:2d::ac:53]:53", 0}, - &DNSServer{"burble.dn42", "burble", "lt-vil1", 6, "[fd42:4242:2601:3d::ac:53]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "nl-1", 6, "[fd42:5d71:219::1]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "de-1", 6, "[fd42:5d71:219::2]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "gb-1", 6, "[fd42:5d71:219::3]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "fr-1", 6, "[fd42:5d71:219::4]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "us-1", 6, "[fd42:5d71:219::5]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "au-1", 6, "[fd42:5d71:219::6]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "jp-1", 6, "[fd42:5d71:219::7]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "sg-1", 6, "[fd42:5d71:219::8]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "ca-1", 6, "[fd42:5d71:219::9]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "us-2", 6, "[fd42:5d71:219::a]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "us-3", 6, "[fd42:5d71:219::b]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "us-4", 6, "[fd42:5d71:219::c]:53", 0}, - &DNSServer{"jrb0001", "jrb0001", "us-5", 6, "[fd42:5d71:219::d]:53", 0}, +type DNSResult struct { + serial uint32 + rtt float64 + nsid string } // data structures for querying the current commit metric @@ -105,6 +75,84 @@ type DNSMasterCommit struct { Commit string `json:"commit"` } +////////////////////////////////////////////////////////////////////////// + +// hardcoded :( list of DNS servers to query + +var dns_servers = []*DNSServer{ + + // master + + &DNSServer{"master", "jrb0001", "j.master.delegation-servers.dn42", 6, + "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, + + // anycast + + &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 4, + "172.20.0.53:53", 0}, + &DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 6, + "[fd42:d42:d42:54::1]:53", 0}, + + // burble + + &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 4, + "172.20.129.1:53", 0}, + &DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 6, + "[fd42:4242:2601:ac53::1]:53", 0}, + &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 4, + "172.20.129.2:53", 0}, + &DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 6, + "[fd42:4242:2601:ac53::53]:53", 0}, + + &DNSServer{"burble.dn42", "burble", "fr-rbx1", 6, "[fd42:4242:2601:36::ac:53]:53", 0}, + &DNSServer{"burble.dn42", "burble", "us-dal3", 6, "[fd42:4242:2601:2a::ac:53]:53", 0}, + &DNSServer{"burble.dn42", "burble", "sg-sin2", 6, "[fd42:4242:2601:37::ac:53]:53", 0}, + &DNSServer{"burble.dn42", "burble", "ca-bhs2", 6, "[fd42:4242:2601:2d::ac:53]:53", 0}, + &DNSServer{"burble.dn42", "burble", "lt-vil1", 6, "[fd42:4242:2601:3d::ac:53]:53", 0}, + + // jrb0001 + + &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 4, + "172.20.1.254:53", 0}, + &DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 6, + "[fd42:5d71:219:1:a526:d935:281e:22d6]:53", 0}, + &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 4, + "172.20.1.255:53", 0}, + &DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 6, + "[fd42:5d71:219:1:69c2:2b0e:17e8:c215]:53", 0}, + + &DNSServer{"jrb0001", "jrb0001", "nl-1", 6, "[fd42:5d71:219::1]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "de-1", 6, "[fd42:5d71:219::2]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "gb-1", 6, "[fd42:5d71:219::3]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "fr-1", 6, "[fd42:5d71:219::4]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "us-1", 6, "[fd42:5d71:219::5]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "au-1", 6, "[fd42:5d71:219::6]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "jp-1", 6, "[fd42:5d71:219::7]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "sg-1", 6, "[fd42:5d71:219::8]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "ca-1", 6, "[fd42:5d71:219::9]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "us-2", 6, "[fd42:5d71:219::a]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "us-3", 6, "[fd42:5d71:219::b]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "us-4", 6, "[fd42:5d71:219::c]:53", 0}, + &DNSServer{"jrb0001", "jrb0001", "us-5", 6, "[fd42:5d71:219::d]:53", 0}, + + // yamakaja + + &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 4, + "172.20.20.66:53", 0}, + &DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 6, + "[fd42:c01d:beef::3]:53", 0}, + &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 4, + "172.20.20.65:53", 0}, + &DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 6, + "[fd42:c01d:beef::2]:53", 0}, +} + +////////////////////////////////////////////////////////////////////////// +// +// DNS Server Metrics +// +////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// // initialisation function to register metrics @@ -156,39 +204,72 @@ func (m *DNSMetrics) Collect() { now := uint64(time.Now().Unix()) - // icky icky icky - masters := make([]uint64, 2) - masters[0] = dns_servers[0].soa - masters[1] = dns_servers[6].soa - - // go through each server in turn - for ix, server := range dns_servers { - - // query it - soa, rtt := server.Query() - server.soa = uint64(soa) - - // SOA and RTT are direct metrics returned from the query - m.soa.With(m.label_map[ix]).Set(soa) - m.rtt.With(m.label_map[ix]).Set(rtt) - - // check if the returned SOA matches j.master.delegation-servers.dn42 - var valid uint = 0 - - if server.soa == 0 { - // didn't get a result, server issue - valid = 2 + // add master servers to a list to compare SOA + masters := make([]uint32, 2) + for _, server := range dns_servers { + if server.role == "master" { + masters = append(masters, server.soa) } else { + // icky icky icky - manually add yamakaja's server temporarily + if server.role == "delegation" && + server.owner == "yamakaja" && + server.ip == 6 { + masters = append(masters, server.soa) + } + } + } + + // hold the results in an array where each entry corresponds + // to the dns_servers array + results := make([]*DNSResult, len(dns_servers)) + + // query each server up to 3 times to try and get a result + for count := 0; count < 3; count++ { + + for ix := 0; ix < len(results); ix++ { + if results[ix] == nil { + // no result yet, query the server + results[ix] = dns_servers[ix].Query() + } + } + + } + + // now go through each result and update the metrics + for ix, r := range results { + + var valid uint = 2 + + // check if there was a valid result + if r != nil { + + // update the server SOA + dns_servers[ix].soa = r.serial + + // SOA and RTT are direct metrics returned from the query + m.soa.With(m.label_map[ix]).Set(float64(r.serial)) + m.rtt.With(m.label_map[ix]).Set(r.rtt) + + // if the server has a high rtt ( > 500ms), then log an info message + if r.rtt > 500 { + log.WithFields(log.Fields{ + "result": r, + "server": dns_servers[ix].name, + "ipv": dns_servers[ix].ip, + }).Info("DNS Server high RTT") + } + // check if the SOA matches any defined master SOA // assume not valid = 1 - // automatically invalid if out of date - if (now - server.soa) < (3600 * 25) { - // otherwise step through each 'master' and check the SOA matches somewhere + // automatically invalid if out of date (older than 25 hours) + if (now - uint64(r.serial)) < (3600 * 25) { + // otherwise step through each 'master' and + // check if the SOA matches somewhere for _, soa := range masters { - if server.soa == soa { + if r.serial == soa { // match was found valid = 0 break @@ -196,56 +277,116 @@ func (m *DNSMetrics) Collect() { } } - } + // before setting whether the server is valid, calculate the stime + // (difference in time between now and the SOA, to allow checking that + // it is not stale) + // it's possible that the SOA could be in the future if there is a + // clock mismatch between monitor and DNS server, in which case this + // is flagged as a server error - // before setting whether the server is valid, calculate the stime - // (difference in time between now and the SOA, to allow checking that - // it is not stale) - // it's possible that the SOA could be in the future if there is a - // clock mismatch between monitor and DNS server, in which case this - // is flagged as a server error + if uint64(r.serial) > now { + // server error + valid = 2 + } else { + m.stime.With(m.label_map[ix]).Set(float64(now - uint64(r.serial))) + } - if server.soa > now { - // server error - valid = 2 - } else { - m.stime.With(m.label_map[ix]).Set(float64(now - server.soa)) } // finally set the valid status m.valid.With(m.label_map[ix]).Set(float64(valid)) + } } ////////////////////////////////////////////////////////////////////////// +// construct the DNS query and send to a server -func (s *DNSServer) Query() (float64, float64) { +func (s *DNSServer) Query() *DNSResult { + // create a new recursive query msg := new(dns.Msg) msg.Id = dns.Id() msg.RecursionDesired = (s.role == "recursive") + + // query the dn42 root zone SOA msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}} + // add EDNS0 options to also query the service ID (NSID) + // pretty much copied verbatim from the library docs + opts := new(dns.OPT) + opts.Hdr.Name = "." + opts.Hdr.Rrtype = dns.TypeOPT + + // create the NSID option + ns_opt := new(dns.EDNS0_NSID) + ns_opt.Code = dns.EDNS0NSID + ns_opt.Nsid = "" + + // add the NSID option to the opts RR + opts.Option = append(opts.Option, ns_opt) + + // then add the opts RR to the query + msg.Extra = []dns.RR{opts} + + // create a new DNS client client := new(dns.Client) + + // and finally query the server resp, rtt, err := client.Exchange(msg, s.addr) if err != nil || len(resp.Answer) != 1 { log.WithFields(log.Fields{ "error": err, "resp": resp, "server": s, - }).Warn("Unable to query DNS server") + }).Warn("Failed to query DNS server") - return 0, 0 + return nil } - if soa, ok := resp.Answer[0].(*dns.SOA); ok { - return float64(soa.Serial), math.Round(rtt.Seconds() * 1000) - } + // was an SOA returned ? + if soa, ok := resp.Answer[0].(*dns.SOA); !ok { - return 0, 0 + log.WithFields(log.Fields{ + "resp": resp, + "server": s, + }).Warn("DNS response was not an SOA") + + return nil + + } else { + // got an SOA result + + result := &DNSResult{ + serial: soa.Serial, + rtt: math.Round(rtt.Seconds() * 1000), + } + + // did we also get an NSID result ? + if opts := resp.IsEdns0(); opts != nil { + // response contains an EDNS0 record + + // check for an NSID entry + for _, s := range opts.Option { + switch e := s.(type) { + case *dns.EDNS0_NSID: + result.nsid = e.Nsid + } + } + } + + return result + } } +////////////////////////////////////////////////////////////////////////// +// +// DNS Commit Metric +// +// Check the master commit against the current registry commit +// (via the explorer) to check that the master is being updated +// ////////////////////////////////////////////////////////////////////////// func (m *DNSCommitMetrics) Register() { @@ -330,6 +471,7 @@ func (m *DNSCommitMetrics) Collect() { } ////////////////////////////////////////////////////////////////////////// +// fetch the current commit from the explorer func (m *DNSCommitMetrics) FetchExplorerCommit() (string, error) { @@ -361,9 +503,13 @@ func (m *DNSCommitMetrics) FetchExplorerCommit() (string, error) { } +////////////////////////////////////////////////////////////////////////// +// fetch the current commit from the master + func (m *DNSCommitMetrics) FetchMasterCommit() (string, error) { - response, err := http.Get("http://[fd42:180:3de0:10:5054:ff:fe87:ea39]:8080/api/git-db-state") + response, err := + http.Get("http://[fd42:180:3de0:10:5054:ff:fe87:ea39]:8080/api/git-db-state") if err != nil { log.WithFields(log.Fields{ "error": err,