Refactor SOA checking, parallel DNS queries
This commit is contained in:
parent
cb7a98b42b
commit
7b7d9965b9
@ -1,29 +0,0 @@
|
|||||||
##########################################################################
|
|
||||||
# dn42promsrv example systemd service file
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
[Unit]
|
|
||||||
Description=DN42 Prometheus Stats Server
|
|
||||||
After=network.target
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
User=promsrv
|
|
||||||
Group=promsrv
|
|
||||||
Type=simple
|
|
||||||
Restart=on-failure
|
|
||||||
# service hardening
|
|
||||||
ProtectSystem=strict
|
|
||||||
NoNewPrivileges=yes
|
|
||||||
ProtectControlGroups=yes
|
|
||||||
PrivateTmp=yes
|
|
||||||
PrivateDevices=yes
|
|
||||||
DevicePolicy=closed
|
|
||||||
MemoryDenyWriteExecute=yes
|
|
||||||
#
|
|
||||||
ExecStart=/usr/local/bin/dn42promsrv
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
# end of file
|
|
101
dns.go
101
dns.go
@ -18,6 +18,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -174,7 +175,8 @@ func (m *DNSMetrics) Register() {
|
|||||||
|
|
||||||
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
Name: "dn42_dns_valid",
|
Name: "dn42_dns_valid",
|
||||||
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
|
Help: "0 = response and latest serial, 1 = response and matching serial, " +
|
||||||
|
"2 = response but incorrect serial, 3 = server error",
|
||||||
}, []string{"role", "owner", "name", "ip", "addr"})
|
}, []string{"role", "owner", "name", "ip", "addr"})
|
||||||
prometheus.MustRegister(m.valid)
|
prometheus.MustRegister(m.valid)
|
||||||
|
|
||||||
@ -204,44 +206,70 @@ func (m *DNSMetrics) Register() {
|
|||||||
|
|
||||||
func (m *DNSMetrics) Collect() {
|
func (m *DNSMetrics) Collect() {
|
||||||
|
|
||||||
now := uint64(time.Now().Unix())
|
// hold the collect results in an array where each
|
||||||
|
// entry corresponds to the dns_servers array
|
||||||
// search the masters for the highest SOA
|
|
||||||
var latest_soa uint32 = 0
|
|
||||||
|
|
||||||
for _, server := range dns_servers {
|
|
||||||
if server.role == "master" {
|
|
||||||
if server.soa > latest_soa {
|
|
||||||
latest_soa = server.soa
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// hold the results in an array where each entry corresponds
|
|
||||||
// to the dns_servers array
|
|
||||||
results := make([]*DNSResult, len(dns_servers))
|
results := make([]*DNSResult, len(dns_servers))
|
||||||
|
|
||||||
// query each server up to 3 times to try and get a result
|
// query each server up to 3 times to try and get a result
|
||||||
for count := 0; count < 3; count++ {
|
for count := 0; count < 3; count++ {
|
||||||
|
|
||||||
|
// run each tranche of queries in parallel
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
for ix := 0; ix < len(results); ix++ {
|
for ix := 0; ix < len(results); ix++ {
|
||||||
if results[ix] == nil {
|
if results[ix] == nil {
|
||||||
// no result yet, query the server
|
// no result yet, query the server
|
||||||
results[ix] = dns_servers[ix].Query()
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func(i int, s *sync.WaitGroup) {
|
||||||
|
results[i] = dns_servers[i].Query()
|
||||||
|
s.Done()
|
||||||
|
}(ix, &wg)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// all done
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
now := uint64(time.Now().Unix())
|
||||||
|
|
||||||
|
// create a list of master servers SOA and
|
||||||
|
// record which one has the latest version
|
||||||
|
var latest_soa uint32 = 0
|
||||||
|
masters := make([]uint32, 2)
|
||||||
|
|
||||||
|
for ix, server := range dns_servers {
|
||||||
|
if server.role == "master" {
|
||||||
|
soa := results[ix].serial
|
||||||
|
masters = append(masters, soa)
|
||||||
|
if soa > latest_soa {
|
||||||
|
latest_soa = soa
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("latest_soa %d\n", latest_soa)
|
||||||
|
|
||||||
// now go through each result and update the metrics
|
// now go through each result and update the metrics
|
||||||
|
//
|
||||||
|
// valid = 0 - Serial number matches latest master
|
||||||
|
// valid = 1 - Serial number matches one master (but not the latest)
|
||||||
|
// valid = 2 - Serial number doesn't match anything
|
||||||
|
// valid = 3 - An error occured
|
||||||
|
//
|
||||||
|
|
||||||
for ix, r := range results {
|
for ix, r := range results {
|
||||||
|
|
||||||
var valid uint = 2
|
// assume that an error occured unless told otherwise
|
||||||
|
var valid uint = 3
|
||||||
|
|
||||||
// check if there was a valid result
|
// check if there was a valid result
|
||||||
if r != nil {
|
if r != nil {
|
||||||
|
|
||||||
// update the server SOA
|
// update the server SOA from the result
|
||||||
dns_servers[ix].soa = r.serial
|
dns_servers[ix].soa = r.serial
|
||||||
|
|
||||||
// SOA and RTT are direct metrics returned from the query
|
// SOA and RTT are direct metrics returned from the query
|
||||||
@ -257,11 +285,32 @@ func (m *DNSMetrics) Collect() {
|
|||||||
}).Info("DNS Server high RTT")
|
}).Info("DNS Server high RTT")
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if the SOA matches the lastest master SOA
|
// check if the SOA is valid
|
||||||
if r.serial == latest_soa {
|
|
||||||
valid = 0
|
// it's an error if the SOA is more than 25 hours old
|
||||||
} else {
|
if (now - uint64(r.serial)) < (3600 * 25) {
|
||||||
valid = 1
|
|
||||||
|
// fmt.Printf("Checking serial: %s = %d\n", dns_servers[ix].name, r.serial)
|
||||||
|
|
||||||
|
if r.serial == latest_soa {
|
||||||
|
// the SOA matches the current latest SOA
|
||||||
|
valid = 0
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// assume no match found
|
||||||
|
valid = 2
|
||||||
|
|
||||||
|
// step through each master to see if the SOA matches somewhere
|
||||||
|
for _, soa := range masters {
|
||||||
|
if r.serial == soa {
|
||||||
|
// found one
|
||||||
|
valid = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// before setting whether the server is valid, calculate the stime
|
// before setting whether the server is valid, calculate the stime
|
||||||
@ -273,7 +322,7 @@ func (m *DNSMetrics) Collect() {
|
|||||||
|
|
||||||
if uint64(r.serial) > now {
|
if uint64(r.serial) > now {
|
||||||
// server error
|
// server error
|
||||||
valid = 2
|
valid = 3
|
||||||
} else {
|
} else {
|
||||||
m.stime.With(m.label_map[ix]).Set(float64(now - uint64(r.serial)))
|
m.stime.With(m.label_map[ix]).Set(float64(now - uint64(r.serial)))
|
||||||
}
|
}
|
||||||
@ -324,6 +373,8 @@ func (s *DNSServer) Query() *DNSResult {
|
|||||||
client := new(dns.Client)
|
client := new(dns.Client)
|
||||||
client.Timeout, _ = time.ParseDuration("4s")
|
client.Timeout, _ = time.ParseDuration("4s")
|
||||||
|
|
||||||
|
// fmt.Printf("Querying: %s\n", s.name)
|
||||||
|
|
||||||
// and finally query the server
|
// and finally query the server
|
||||||
resp, rtt, err := client.Exchange(msg, s.addr)
|
resp, rtt, err := client.Exchange(msg, s.addr)
|
||||||
if err != nil || len(resp.Answer) != 1 {
|
if err != nil || len(resp.Answer) != 1 {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user