Refactor SOA checking, parallel DNS queries
This commit is contained in:
parent
cb7a98b42b
commit
7b7d9965b9
@ -1,29 +0,0 @@
|
||||
##########################################################################
|
||||
# dn42promsrv example systemd service file
|
||||
##########################################################################
|
||||
|
||||
[Unit]
|
||||
Description=DN42 Prometheus Stats Server
|
||||
After=network.target
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
[Service]
|
||||
User=promsrv
|
||||
Group=promsrv
|
||||
Type=simple
|
||||
Restart=on-failure
|
||||
# service hardening
|
||||
ProtectSystem=strict
|
||||
NoNewPrivileges=yes
|
||||
ProtectControlGroups=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
DevicePolicy=closed
|
||||
MemoryDenyWriteExecute=yes
|
||||
#
|
||||
ExecStart=/usr/local/bin/dn42promsrv
|
||||
|
||||
#########################################################################
|
||||
# end of file
|
101
dns.go
101
dns.go
@ -18,6 +18,7 @@ import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -174,7 +175,8 @@ func (m *DNSMetrics) Register() {
|
||||
|
||||
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "dn42_dns_valid",
|
||||
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
|
||||
Help: "0 = response and latest serial, 1 = response and matching serial, " +
|
||||
"2 = response but incorrect serial, 3 = server error",
|
||||
}, []string{"role", "owner", "name", "ip", "addr"})
|
||||
prometheus.MustRegister(m.valid)
|
||||
|
||||
@ -204,44 +206,70 @@ func (m *DNSMetrics) Register() {
|
||||
|
||||
func (m *DNSMetrics) Collect() {
|
||||
|
||||
now := uint64(time.Now().Unix())
|
||||
|
||||
// search the masters for the highest SOA
|
||||
var latest_soa uint32 = 0
|
||||
|
||||
for _, server := range dns_servers {
|
||||
if server.role == "master" {
|
||||
if server.soa > latest_soa {
|
||||
latest_soa = server.soa
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hold the results in an array where each entry corresponds
|
||||
// to the dns_servers array
|
||||
// hold the collect results in an array where each
|
||||
// entry corresponds to the dns_servers array
|
||||
results := make([]*DNSResult, len(dns_servers))
|
||||
|
||||
// query each server up to 3 times to try and get a result
|
||||
for count := 0; count < 3; count++ {
|
||||
|
||||
// run each tranche of queries in parallel
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for ix := 0; ix < len(results); ix++ {
|
||||
if results[ix] == nil {
|
||||
// no result yet, query the server
|
||||
results[ix] = dns_servers[ix].Query()
|
||||
|
||||
wg.Add(1)
|
||||
go func(i int, s *sync.WaitGroup) {
|
||||
results[i] = dns_servers[i].Query()
|
||||
s.Done()
|
||||
}(ix, &wg)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// all done
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
now := uint64(time.Now().Unix())
|
||||
|
||||
// create a list of master servers SOA and
|
||||
// record which one has the latest version
|
||||
var latest_soa uint32 = 0
|
||||
masters := make([]uint32, 2)
|
||||
|
||||
for ix, server := range dns_servers {
|
||||
if server.role == "master" {
|
||||
soa := results[ix].serial
|
||||
masters = append(masters, soa)
|
||||
if soa > latest_soa {
|
||||
latest_soa = soa
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fmt.Printf("latest_soa %d\n", latest_soa)
|
||||
|
||||
// now go through each result and update the metrics
|
||||
//
|
||||
// valid = 0 - Serial number matches latest master
|
||||
// valid = 1 - Serial number matches one master (but not the latest)
|
||||
// valid = 2 - Serial number doesn't match anything
|
||||
// valid = 3 - An error occured
|
||||
//
|
||||
|
||||
for ix, r := range results {
|
||||
|
||||
var valid uint = 2
|
||||
// assume that an error occured unless told otherwise
|
||||
var valid uint = 3
|
||||
|
||||
// check if there was a valid result
|
||||
if r != nil {
|
||||
|
||||
// update the server SOA
|
||||
// update the server SOA from the result
|
||||
dns_servers[ix].soa = r.serial
|
||||
|
||||
// SOA and RTT are direct metrics returned from the query
|
||||
@ -257,11 +285,32 @@ func (m *DNSMetrics) Collect() {
|
||||
}).Info("DNS Server high RTT")
|
||||
}
|
||||
|
||||
// check if the SOA matches the lastest master SOA
|
||||
if r.serial == latest_soa {
|
||||
valid = 0
|
||||
} else {
|
||||
valid = 1
|
||||
// check if the SOA is valid
|
||||
|
||||
// it's an error if the SOA is more than 25 hours old
|
||||
if (now - uint64(r.serial)) < (3600 * 25) {
|
||||
|
||||
// fmt.Printf("Checking serial: %s = %d\n", dns_servers[ix].name, r.serial)
|
||||
|
||||
if r.serial == latest_soa {
|
||||
// the SOA matches the current latest SOA
|
||||
valid = 0
|
||||
|
||||
} else {
|
||||
|
||||
// assume no match found
|
||||
valid = 2
|
||||
|
||||
// step through each master to see if the SOA matches somewhere
|
||||
for _, soa := range masters {
|
||||
if r.serial == soa {
|
||||
// found one
|
||||
valid = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// before setting whether the server is valid, calculate the stime
|
||||
@ -273,7 +322,7 @@ func (m *DNSMetrics) Collect() {
|
||||
|
||||
if uint64(r.serial) > now {
|
||||
// server error
|
||||
valid = 2
|
||||
valid = 3
|
||||
} else {
|
||||
m.stime.With(m.label_map[ix]).Set(float64(now - uint64(r.serial)))
|
||||
}
|
||||
@ -324,6 +373,8 @@ func (s *DNSServer) Query() *DNSResult {
|
||||
client := new(dns.Client)
|
||||
client.Timeout, _ = time.ParseDuration("4s")
|
||||
|
||||
// fmt.Printf("Querying: %s\n", s.name)
|
||||
|
||||
// and finally query the server
|
||||
resp, rtt, err := client.Exchange(msg, s.addr)
|
||||
if err != nil || len(resp.Answer) != 1 {
|
||||
|
Loading…
x
Reference in New Issue
Block a user