dn42promsrv/dns.go
2019-05-27 12:37:44 +01:00

367 lines
10 KiB
Go

//////////////////////////////////////////////////////////////////////////
// DNS Metrics
//////////////////////////////////////////////////////////////////////////
package main
//////////////////////////////////////////////////////////////////////////
import (
"encoding/json"
"errors"
// "fmt"
dns "github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"io/ioutil"
"math"
"net/http"
"strconv"
"strings"
"time"
)
//////////////////////////////////////////////////////////////////////////
// data structures
// for holding the DNS metrics
type DNSMetrics struct {
soa *prometheus.GaugeVec
rtt *prometheus.GaugeVec
valid *prometheus.GaugeVec
stime *prometheus.GaugeVec
label_map []prometheus.Labels
}
// for specifying DNS servers
type DNSServer struct {
role string
owner string
name string
ip uint8
addr string
soa uint64
}
// hardcoded :( list of DNS servers to query
var dns_servers = []*DNSServer{
&DNSServer{"master", "jrb0001", "j.master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0},
&DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0},
&DNSServer{"delegation", "burble", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0},
&DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0},
&DNSServer{"delegation", "jrb0001", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0},
&DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0},
&DNSServer{"delegation", "yamakaja", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0},
&DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0},
&DNSServer{"recursive", "yamakaja", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0},
&DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0},
&DNSServer{"recursive", "burble", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0},
&DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0},
&DNSServer{"recursive", "jrb0001", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0},
&DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0},
&DNSServer{"recursive", "yamakaja", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0},
&DNSServer{"burble.dn42", "burble", "fr-rbx1", 6, "[fd42:4242:2601:36::ac:53]:53", 0},
&DNSServer{"burble.dn42", "burble", "us-dal3", 6, "[fd42:4242:2601:2a::ac:53]:53", 0},
&DNSServer{"burble.dn42", "burble", "sg-sin2", 6, "[fd42:4242:2601:37::ac:53]:53", 0},
&DNSServer{"burble.dn42", "burble", "ca-bhs2", 6, "[fd42:4242:2601:2d::ac:53]:53", 0},
&DNSServer{"burble.dn42", "burble", "lt-vil1", 6, "[fd42:4242:2601:3d::ac:53]:53", 0},
}
// data structures for querying the current commit metric
type DNSCommitMetrics struct {
match *prometheus.GaugeVec
lastUpdate time.Time
mismatch bool
since time.Time
labelInvalid prometheus.Labels
labelUpdate prometheus.Labels
}
type DNSExplorerCommit struct {
Commit string
}
type DNSMasterCommit struct {
Type string `json:"type"`
URI string `json:"uri"`
Branch string `json:"branch"`
Commit string `json:"commit"`
}
//////////////////////////////////////////////////////////////////////////
// initialisation function to register metrics
func (m *DNSMetrics) Register() {
m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_soa",
Help: "SOA for .dn42 domain",
}, []string{"role", "owner", "name", "ip", "addr"})
prometheus.MustRegister(m.soa)
m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_rtt",
Help: "RTT when collecting SOA for .dn42 domain",
}, []string{"role", "owner", "name", "ip", "addr"})
prometheus.MustRegister(m.rtt)
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_valid",
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
}, []string{"role", "owner", "name", "ip", "addr"})
prometheus.MustRegister(m.valid)
m.stime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_stime",
Help: "Returns the time between now and the SOA serial number",
}, []string{"role", "owner", "name", "ip", "addr"})
prometheus.MustRegister(m.stime)
// pre-populate the labels
m.label_map = make([]prometheus.Labels, len(dns_servers))
for ix, server := range dns_servers {
m.label_map[ix] = prometheus.Labels{
"role": server.role,
"owner": server.owner,
"name": server.name,
"ip": strconv.Itoa(int(server.ip)),
"addr": server.addr,
}
}
}
//////////////////////////////////////////////////////////////////////////
// collect metrics for all DNS servers
func (m *DNSMetrics) Collect() {
now := uint64(time.Now().Unix())
// go through each server in turn
for ix, server := range dns_servers {
// query it
soa, rtt := server.Query()
server.soa = uint64(soa)
// SOA and RTT are direct metrics returned from the query
m.soa.With(m.label_map[ix]).Set(soa)
m.rtt.With(m.label_map[ix]).Set(rtt)
// check if the returned SOA matches j.master.delegation-servers.dn42
var valid uint = 0
if server.soa == 0 {
// didn't get a result, server issue
valid = 2
} else {
if server.soa != dns_servers[0].soa {
// SOA didn't match
valid = 1
} else {
valid = 0
}
}
// before setting whether the server is valid, calculate the stime
// (difference in time between now and the SOA, to allow checking that
// it is not stale)
// it's possible that the SOA could be in the future if there is a
// clock mismatch between monitor and DNS server, in which case this
// is flagged as a server error
if server.soa > now {
// server error
valid = 2
} else {
m.stime.With(m.label_map[ix]).Set(float64(now - server.soa))
}
// finally set the valid status
m.valid.With(m.label_map[ix]).Set(float64(valid))
}
}
//////////////////////////////////////////////////////////////////////////
func (s *DNSServer) Query() (float64, float64) {
msg := new(dns.Msg)
msg.Id = dns.Id()
msg.RecursionDesired = (s.role == "recursive")
msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}}
client := new(dns.Client)
resp, rtt, err := client.Exchange(msg, s.addr)
if err != nil || len(resp.Answer) != 1 {
log.WithFields(log.Fields{
"error": err,
"resp": resp,
"server": s,
}).Warn("Unable to query DNS server")
return 0, 0
}
if soa, ok := resp.Answer[0].(*dns.SOA); ok {
return float64(soa.Serial), math.Round(rtt.Seconds() * 1000)
}
return 0, 0
}
//////////////////////////////////////////////////////////////////////////
func (m *DNSCommitMetrics) Register() {
m.match = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_commit",
Help: "Time since commits last matched (or -1 for error)",
}, []string{"metric"})
prometheus.MustRegister(m.match)
m.labelInvalid = prometheus.Labels{
"metric": "invalid",
}
m.labelUpdate = prometheus.Labels{
"metric": "update",
}
}
func (m *DNSCommitMetrics) Collect() {
now := time.Now()
interval := now.Sub(m.lastUpdate)
// only check if it's more than 60 mins since the last successful match
if (interval.Hours() >= 1.0) || m.mismatch {
// fetch the current commit from the explorer
ec, err := m.FetchExplorerCommit()
if err != nil {
// couldn't fetch the explorer commit value, set the metric to bad
m.match.With(m.labelUpdate).Set(-1.0)
m.match.With(m.labelInvalid).Set(-1.0)
m.mismatch = true
return
}
mc, err := m.FetchMasterCommit()
if err != nil {
// couldn't fetch master commit value, set the metric to bad
m.match.With(m.labelUpdate).Set(-2.0)
m.match.With(m.labelInvalid).Set(-1.0)
m.mismatch = true
return
}
m.lastUpdate = now
m.match.With(m.labelUpdate).Set(0.0)
if ec == mc {
// if the commits match, null the metric
m.match.With(m.labelInvalid).Set(0.0)
m.mismatch = false
} else {
// was this the first mismatch ?
if !m.mismatch {
m.since = now
m.mismatch = true
}
// set the metric to be the interval since last good match
m.match.With(m.labelInvalid).Set(float64(now.Sub(m.since).Seconds()))
}
} else {
// update time since last check
m.match.With(m.labelUpdate).Set(float64(interval.Seconds()))
}
}
//////////////////////////////////////////////////////////////////////////
func (m *DNSCommitMetrics) FetchExplorerCommit() (string, error) {
response, err := http.Get("http://collector.dn42:8043/api/registry/.meta")
if err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable to query registry explorer")
return "", err
}
data, err := ioutil.ReadAll(response.Body)
if err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable read explorer response")
return "", err
}
var ec DNSExplorerCommit
if err := json.Unmarshal(data, &ec); err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable to parse explorer JSON")
return "", err
}
return strings.ToLower(ec.Commit), nil
}
func (m *DNSCommitMetrics) FetchMasterCommit() (string, error) {
response, err := http.Get("http://[2a0c:3800:1:1011:5054:ff:fe87:ea39]:8080/api/git-db-state")
if err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable to query registry master")
return "", err
}
data, err := ioutil.ReadAll(response.Body)
if err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable read master response")
return "", err
}
var mc []DNSMasterCommit
if err := json.Unmarshal(data, &mc); err != nil {
log.WithFields(log.Fields{
"error": err,
}).Warn("Unable to parse master JSON")
return "", err
}
for _, c := range mc {
if c.URI == "https://git.dn42.us/dn42/registry.git" {
return strings.ToLower(c.Commit), nil
}
}
log.WithFields(log.Fields{
"MasterCommit": mc,
}).Warn("Unable to find registry commit from master")
return "", errors.New("Unable to find registry commit from master")
}
//////////////////////////////////////////////////////////////////////////
// end of code