From 7b7d9965b9069078544f87dc96c97b6b66a8294f Mon Sep 17 00:00:00 2001 From: Simon Marsh Date: Sat, 29 Jun 2019 10:52:40 +0100 Subject: [PATCH] Refactor SOA checking, parallel DNS queries --- contrib/#dn42promsrv.service# | 29 ---------- dns.go | 101 +++++++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 54 deletions(-) delete mode 100644 contrib/#dn42promsrv.service# diff --git a/contrib/#dn42promsrv.service# b/contrib/#dn42promsrv.service# deleted file mode 100644 index 0349925..0000000 --- a/contrib/#dn42promsrv.service# +++ /dev/null @@ -1,29 +0,0 @@ -########################################################################## -# dn42promsrv example systemd service file -########################################################################## - -[Unit] -Description=DN42 Prometheus Stats Server -After=network.target - -[Install] -WantedBy=multi-user.target - -[Service] -User=promsrv -Group=promsrv -Type=simple -Restart=on-failure -# service hardening -ProtectSystem=strict -NoNewPrivileges=yes -ProtectControlGroups=yes -PrivateTmp=yes -PrivateDevices=yes -DevicePolicy=closed -MemoryDenyWriteExecute=yes -# -ExecStart=/usr/local/bin/dn42promsrv - -######################################################################### -# end of file diff --git a/dns.go b/dns.go index 0516437..9daf3d2 100644 --- a/dns.go +++ b/dns.go @@ -18,6 +18,7 @@ import ( "net/http" "strconv" "strings" + "sync" "time" ) @@ -174,7 +175,8 @@ func (m *DNSMetrics) Register() { m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "dn42_dns_valid", - Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response", + Help: "0 = response and latest serial, 1 = response and matching serial, " + + "2 = response but incorrect serial, 3 = server error", }, []string{"role", "owner", "name", "ip", "addr"}) prometheus.MustRegister(m.valid) @@ -204,44 +206,70 @@ func (m *DNSMetrics) Register() { func (m *DNSMetrics) Collect() { - now := uint64(time.Now().Unix()) - - // search the masters for the highest SOA - var latest_soa uint32 = 0 - - for _, server := range dns_servers { - if server.role == "master" { - if server.soa > latest_soa { - latest_soa = server.soa - } - } - } - - // hold the results in an array where each entry corresponds - // to the dns_servers array + // hold the collect results in an array where each + // entry corresponds to the dns_servers array results := make([]*DNSResult, len(dns_servers)) // query each server up to 3 times to try and get a result for count := 0; count < 3; count++ { + // run each tranche of queries in parallel + var wg sync.WaitGroup + for ix := 0; ix < len(results); ix++ { if results[ix] == nil { // no result yet, query the server - results[ix] = dns_servers[ix].Query() + + wg.Add(1) + go func(i int, s *sync.WaitGroup) { + results[i] = dns_servers[i].Query() + s.Done() + }(ix, &wg) } + } + // all done + wg.Wait() + } + now := uint64(time.Now().Unix()) + + // create a list of master servers SOA and + // record which one has the latest version + var latest_soa uint32 = 0 + masters := make([]uint32, 2) + + for ix, server := range dns_servers { + if server.role == "master" { + soa := results[ix].serial + masters = append(masters, soa) + if soa > latest_soa { + latest_soa = soa + } + } + } + + // fmt.Printf("latest_soa %d\n", latest_soa) + // now go through each result and update the metrics + // + // valid = 0 - Serial number matches latest master + // valid = 1 - Serial number matches one master (but not the latest) + // valid = 2 - Serial number doesn't match anything + // valid = 3 - An error occured + // + for ix, r := range results { - var valid uint = 2 + // assume that an error occured unless told otherwise + var valid uint = 3 // check if there was a valid result if r != nil { - // update the server SOA + // update the server SOA from the result dns_servers[ix].soa = r.serial // SOA and RTT are direct metrics returned from the query @@ -257,11 +285,32 @@ func (m *DNSMetrics) Collect() { }).Info("DNS Server high RTT") } - // check if the SOA matches the lastest master SOA - if r.serial == latest_soa { - valid = 0 - } else { - valid = 1 + // check if the SOA is valid + + // it's an error if the SOA is more than 25 hours old + if (now - uint64(r.serial)) < (3600 * 25) { + + // fmt.Printf("Checking serial: %s = %d\n", dns_servers[ix].name, r.serial) + + if r.serial == latest_soa { + // the SOA matches the current latest SOA + valid = 0 + + } else { + + // assume no match found + valid = 2 + + // step through each master to see if the SOA matches somewhere + for _, soa := range masters { + if r.serial == soa { + // found one + valid = 1 + break + } + } + } + } // before setting whether the server is valid, calculate the stime @@ -273,7 +322,7 @@ func (m *DNSMetrics) Collect() { if uint64(r.serial) > now { // server error - valid = 2 + valid = 3 } else { m.stime.With(m.label_map[ix]).Set(float64(now - uint64(r.serial))) } @@ -324,6 +373,8 @@ func (s *DNSServer) Query() *DNSResult { client := new(dns.Client) client.Timeout, _ = time.ParseDuration("4s") + // fmt.Printf("Querying: %s\n", s.name) + // and finally query the server resp, rtt, err := client.Exchange(msg, s.addr) if err != nil || len(resp.Answer) != 1 {