367 lines
10 KiB
Go
367 lines
10 KiB
Go
//////////////////////////////////////////////////////////////////////////
|
|
// DNS Metrics
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
package main
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
// "fmt"
|
|
dns "github.com/miekg/dns"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
log "github.com/sirupsen/logrus"
|
|
"io/ioutil"
|
|
"math"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// data structures
|
|
|
|
// for holding the DNS metrics
|
|
|
|
type DNSMetrics struct {
|
|
soa *prometheus.GaugeVec
|
|
rtt *prometheus.GaugeVec
|
|
valid *prometheus.GaugeVec
|
|
stime *prometheus.GaugeVec
|
|
label_map []prometheus.Labels
|
|
}
|
|
|
|
// for specifying DNS servers
|
|
|
|
type DNSServer struct {
|
|
role string
|
|
owner string
|
|
name string
|
|
ip uint8
|
|
addr string
|
|
soa uint64
|
|
}
|
|
|
|
// hardcoded :( list of DNS servers to query
|
|
|
|
var dns_servers = []*DNSServer{
|
|
&DNSServer{"master", "jrb0001", "j.master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0},
|
|
&DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0},
|
|
&DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0},
|
|
&DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0},
|
|
&DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0},
|
|
&DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0},
|
|
&DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0},
|
|
&DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0},
|
|
&DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0},
|
|
&DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0},
|
|
&DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0},
|
|
&DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0},
|
|
&DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0},
|
|
&DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0},
|
|
&DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0},
|
|
&DNSServer{"burble.dn42", "burble", "fr-rbx1", 6, "[fd42:4242:2601:36::ac:53]:53", 0},
|
|
&DNSServer{"burble.dn42", "burble", "us-dal3", 6, "[fd42:4242:2601:2a::ac:53]:53", 0},
|
|
&DNSServer{"burble.dn42", "burble", "sg-sin2", 6, "[fd42:4242:2601:37::ac:53]:53", 0},
|
|
&DNSServer{"burble.dn42", "burble", "ca-bhs2", 6, "[fd42:4242:2601:2d::ac:53]:53", 0},
|
|
&DNSServer{"burble.dn42", "burble", "lt-vil1", 6, "[fd42:4242:2601:3d::ac:53]:53", 0},
|
|
}
|
|
|
|
// data structures for querying the current commit metric
|
|
|
|
type DNSCommitMetrics struct {
|
|
match *prometheus.GaugeVec
|
|
lastUpdate time.Time
|
|
mismatch bool
|
|
since time.Time
|
|
labelInvalid prometheus.Labels
|
|
labelUpdate prometheus.Labels
|
|
}
|
|
|
|
type DNSExplorerCommit struct {
|
|
Commit string
|
|
}
|
|
|
|
type DNSMasterCommit struct {
|
|
Type string `json:"type"`
|
|
URI string `json:"uri"`
|
|
Branch string `json:"branch"`
|
|
Commit string `json:"commit"`
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// initialisation function to register metrics
|
|
|
|
func (m *DNSMetrics) Register() {
|
|
|
|
m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "dn42_dns_soa",
|
|
Help: "SOA for .dn42 domain",
|
|
}, []string{"role", "owner", "name", "ip", "addr"})
|
|
prometheus.MustRegister(m.soa)
|
|
|
|
m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "dn42_dns_rtt",
|
|
Help: "RTT when collecting SOA for .dn42 domain",
|
|
}, []string{"role", "owner", "name", "ip", "addr"})
|
|
prometheus.MustRegister(m.rtt)
|
|
|
|
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "dn42_dns_valid",
|
|
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
|
|
}, []string{"role", "owner", "name", "ip", "addr"})
|
|
prometheus.MustRegister(m.valid)
|
|
|
|
m.stime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "dn42_dns_stime",
|
|
Help: "Returns the time between now and the SOA serial number",
|
|
}, []string{"role", "owner", "name", "ip", "addr"})
|
|
prometheus.MustRegister(m.stime)
|
|
|
|
// pre-populate the labels
|
|
m.label_map = make([]prometheus.Labels, len(dns_servers))
|
|
|
|
for ix, server := range dns_servers {
|
|
m.label_map[ix] = prometheus.Labels{
|
|
"role": server.role,
|
|
"owner": server.owner,
|
|
"name": server.name,
|
|
"ip": strconv.Itoa(int(server.ip)),
|
|
"addr": server.addr,
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// collect metrics for all DNS servers
|
|
|
|
func (m *DNSMetrics) Collect() {
|
|
|
|
now := uint64(time.Now().Unix())
|
|
|
|
// go through each server in turn
|
|
for ix, server := range dns_servers {
|
|
|
|
// query it
|
|
soa, rtt := server.Query()
|
|
server.soa = uint64(soa)
|
|
|
|
// SOA and RTT are direct metrics returned from the query
|
|
m.soa.With(m.label_map[ix]).Set(soa)
|
|
m.rtt.With(m.label_map[ix]).Set(rtt)
|
|
|
|
// check if the returned SOA matches j.master.delegation-servers.dn42
|
|
var valid uint = 0
|
|
|
|
if server.soa == 0 {
|
|
// didn't get a result, server issue
|
|
valid = 2
|
|
} else {
|
|
if server.soa != dns_servers[0].soa {
|
|
// SOA didn't match
|
|
valid = 1
|
|
} else {
|
|
valid = 0
|
|
}
|
|
}
|
|
|
|
// before setting whether the server is valid, calculate the stime
|
|
// (difference in time between now and the SOA, to allow checking that
|
|
// it is not stale)
|
|
// it's possible that the SOA could be in the future if there is a
|
|
// clock mismatch between monitor and DNS server, in which case this
|
|
// is flagged as a server error
|
|
|
|
if server.soa > now {
|
|
// server error
|
|
valid = 2
|
|
} else {
|
|
m.stime.With(m.label_map[ix]).Set(float64(now - server.soa))
|
|
}
|
|
|
|
// finally set the valid status
|
|
m.valid.With(m.label_map[ix]).Set(float64(valid))
|
|
}
|
|
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
func (s *DNSServer) Query() (float64, float64) {
|
|
|
|
msg := new(dns.Msg)
|
|
msg.Id = dns.Id()
|
|
msg.RecursionDesired = (s.role == "recursive")
|
|
msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}}
|
|
|
|
client := new(dns.Client)
|
|
resp, rtt, err := client.Exchange(msg, s.addr)
|
|
if err != nil || len(resp.Answer) != 1 {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
"resp": resp,
|
|
"server": s,
|
|
}).Warn("Unable to query DNS server")
|
|
|
|
return 0, 0
|
|
}
|
|
|
|
if soa, ok := resp.Answer[0].(*dns.SOA); ok {
|
|
return float64(soa.Serial), math.Round(rtt.Seconds() * 1000)
|
|
}
|
|
|
|
return 0, 0
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
func (m *DNSCommitMetrics) Register() {
|
|
m.match = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "dn42_dns_commit",
|
|
Help: "Time since commits last matched (or -1 for error)",
|
|
}, []string{"metric"})
|
|
prometheus.MustRegister(m.match)
|
|
|
|
m.labelInvalid = prometheus.Labels{
|
|
"metric": "invalid",
|
|
}
|
|
|
|
m.labelUpdate = prometheus.Labels{
|
|
"metric": "update",
|
|
}
|
|
|
|
}
|
|
|
|
func (m *DNSCommitMetrics) Collect() {
|
|
|
|
now := time.Now()
|
|
interval := now.Sub(m.lastUpdate)
|
|
|
|
// only check if it's more than 60 mins since the last successful match
|
|
if (interval.Hours() >= 1.0) || m.mismatch {
|
|
|
|
// fetch the current commit from the explorer
|
|
ec, err := m.FetchExplorerCommit()
|
|
if err != nil {
|
|
// couldn't fetch the explorer commit value, set the metric to bad
|
|
m.match.With(m.labelUpdate).Set(-1.0)
|
|
m.match.With(m.labelInvalid).Set(-1.0)
|
|
m.mismatch = true
|
|
return
|
|
}
|
|
|
|
mc, err := m.FetchMasterCommit()
|
|
if err != nil {
|
|
// couldn't fetch master commit value, set the metric to bad
|
|
m.match.With(m.labelUpdate).Set(-2.0)
|
|
m.match.With(m.labelInvalid).Set(-1.0)
|
|
m.mismatch = true
|
|
return
|
|
}
|
|
|
|
m.lastUpdate = now
|
|
m.match.With(m.labelUpdate).Set(0.0)
|
|
|
|
if ec == mc {
|
|
|
|
// if the commits match, null the metric
|
|
m.match.With(m.labelInvalid).Set(0.0)
|
|
m.mismatch = false
|
|
|
|
} else {
|
|
|
|
// was this the first mismatch ?
|
|
if !m.mismatch {
|
|
m.since = now
|
|
m.mismatch = true
|
|
}
|
|
|
|
// set the metric to be the interval since last good match
|
|
m.match.With(m.labelInvalid).Set(float64(now.Sub(m.since).Seconds()))
|
|
}
|
|
|
|
} else {
|
|
|
|
// update time since last check
|
|
m.match.With(m.labelUpdate).Set(float64(interval.Seconds()))
|
|
}
|
|
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
func (m *DNSCommitMetrics) FetchExplorerCommit() (string, error) {
|
|
|
|
response, err := http.Get("http://collector.dn42:8043/api/registry/.meta")
|
|
if err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable to query registry explorer")
|
|
return "", err
|
|
}
|
|
|
|
data, err := ioutil.ReadAll(response.Body)
|
|
if err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable read explorer response")
|
|
return "", err
|
|
}
|
|
|
|
var ec DNSExplorerCommit
|
|
if err := json.Unmarshal(data, &ec); err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable to parse explorer JSON")
|
|
return "", err
|
|
}
|
|
|
|
return strings.ToLower(ec.Commit), nil
|
|
|
|
}
|
|
|
|
func (m *DNSCommitMetrics) FetchMasterCommit() (string, error) {
|
|
|
|
response, err := http.Get("http://[2a0c:3800:1:1011:5054:ff:fe87:ea39]:8080/api/git-db-state")
|
|
if err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable to query registry master")
|
|
return "", err
|
|
}
|
|
|
|
data, err := ioutil.ReadAll(response.Body)
|
|
if err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable read master response")
|
|
return "", err
|
|
}
|
|
|
|
var mc []DNSMasterCommit
|
|
if err := json.Unmarshal(data, &mc); err != nil {
|
|
log.WithFields(log.Fields{
|
|
"error": err,
|
|
}).Warn("Unable to parse master JSON")
|
|
return "", err
|
|
}
|
|
|
|
for _, c := range mc {
|
|
if c.URI == "https://git.dn42.us/dn42/registry.git" {
|
|
return strings.ToLower(c.Commit), nil
|
|
}
|
|
}
|
|
|
|
log.WithFields(log.Fields{
|
|
"MasterCommit": mc,
|
|
}).Warn("Unable to find registry commit from master")
|
|
return "", errors.New("Unable to find registry commit from master")
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// end of code
|