Initial commit to provide DNS stats

This commit is contained in:
Simon Marsh 2019-05-10 20:29:37 +01:00
commit 34384f7565
Signed by: burble
GPG Key ID: 7B9FE8780CFB6593
4 changed files with 376 additions and 0 deletions

9
README.md Normal file
View File

@ -0,0 +1,9 @@
# dn42promsrv
A small go server to provide custom DN42 related statistics to prometheus.
Current stats include:
* DNS availability and SOA Serial checks
*

View File

@ -0,0 +1,29 @@
##########################################################################
# dn42promsrv example systemd service file
##########################################################################
[Unit]
Description=DN42 Prometheus Stats Server
After=network.target
[Install]
WantedBy=multi-user.target
[Service]
User=promsrv
Group=promsrv
Type=simple
Restart=on-failure
# service hardening
ProtectSystem=strict
NoNewPrivileges=yes
ProtectControlGroups=yes
PrivateTmp=yes
PrivateDevices=yes
DevicePolicy=closed
MemoryDenyWriteExecute=yes
#
ExecStart=/usr/local/bin/dn42promsrv
#########################################################################
# end of file

192
dn42promsrv.go Normal file
View File

@ -0,0 +1,192 @@
//////////////////////////////////////////////////////////////////////////
// DN42 Prometheus Stats Server
//////////////////////////////////////////////////////////////////////////
package main
//////////////////////////////////////////////////////////////////////////
import (
"context"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
flag "github.com/spf13/pflag"
"net/http"
"os"
"os/signal"
"sync"
"time"
)
//////////////////////////////////////////////////////////////////////////
type Metric interface {
Register()
Collect()
}
//////////////////////////////////////////////////////////////////////////
// Set the log level
func setLogLevel(levelStr string) {
if level, err := log.ParseLevel(levelStr); err != nil {
// failed to set the level
// set a sensible default and, of course, log the error
log.SetLevel(log.InfoLevel)
log.WithFields(log.Fields{
"loglevel": levelStr,
"error": err,
}).Error("Failed to set requested log level")
} else {
// set the requested level
log.SetLevel(level)
}
}
//////////////////////////////////////////////////////////////////////////
// collect metrics
func collectMetrics(stop chan bool,
notify *sync.WaitGroup, interval time.Duration,
metrics []Metric) {
notify.Add(1)
defer notify.Done()
ticker := time.NewTicker(interval)
defer ticker.Stop()
log.WithFields(log.Fields{
"Interval": interval,
}).Info("Starting data collection")
for {
select {
case <-stop:
// stop updating
return
case <-ticker.C:
// timer expired, perform an update
for _, metric := range metrics {
metric.Collect()
}
}
}
}
//////////////////////////////////////////////////////////////////////////
// initialise metrics
func initMetrics() []Metric {
metrics := make([]Metric, 1)
metrics[0] = &DNSMetrics{}
return metrics
}
//////////////////////////////////////////////////////////////////////////
func main() {
// set a default log level, so that logging can be used immediately
// the level will be overidden later on once the command line
// options are loaded
log.SetLevel(log.InfoLevel)
log.Info("DN42 Stats Server Starting")
// declare cmd line options
var (
logLevel = flag.StringP("LogLevel", "l", "Info", "Log level")
bindAddress = flag.StringP("BindAddress", "b", ":8001", "Server bind address")
refreshInterval = flag.StringP("Refresh", "i", "1m", "Refresh interval")
)
flag.Parse()
// now initialise logging properly based on the cmd line options
setLogLevel(*logLevel)
// parse the refreshInterval and start data collection
interval, err := time.ParseDuration(*refreshInterval)
if err != nil {
log.WithFields(log.Fields{
"error": err,
"interval": *refreshInterval,
}).Fatal("Unable to parse refresh interval")
}
// initialise and register metrics
metrics := initMetrics()
for _, metric := range metrics {
metric.Register()
}
// start metric collection
notify_complete := &sync.WaitGroup{}
stop_collection := make(chan bool)
go collectMetrics(stop_collection, notify_complete, interval, metrics)
// initialise router and install prom handler
router := mux.NewRouter()
router.Handle("/metrics", promhttp.Handler())
// initialise http server
server := &http.Server{
Addr: *bindAddress,
WriteTimeout: time.Second * 15,
ReadTimeout: time.Second * 15,
IdleTimeout: time.Second * 60,
Handler: router,
}
// run the server in a non-blocking goroutine
log.WithFields(log.Fields{
"BindAddress": *bindAddress,
}).Info("Starting server")
go func() {
if err := server.ListenAndServe(); err != nil {
log.WithFields(log.Fields{
"error": err,
"BindAddress": *bindAddress,
}).Fatal("Unable to start server")
}
}()
// graceful shutdown via SIGINT (^C)
csig := make(chan os.Signal, 1)
signal.Notify(csig, os.Interrupt)
// and block
<-csig
log.Info("Server shutting down")
// deadline for server to shutdown
ctx, cancel := context.WithTimeout(context.Background(), 10)
defer cancel()
// shutdown stats collection and the server
close(stop_collection)
notify_complete.Wait()
server.Shutdown(ctx)
// nothing left to do
log.Info("Shutdown complete, all done")
os.Exit(0)
}
//////////////////////////////////////////////////////////////////////////
// end of code

146
dns.go Normal file
View File

@ -0,0 +1,146 @@
//////////////////////////////////////////////////////////////////////////
// DNS Metrics
//////////////////////////////////////////////////////////////////////////
package main
//////////////////////////////////////////////////////////////////////////
import (
// "fmt"
dns "github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"math"
"strconv"
)
//////////////////////////////////////////////////////////////////////////
type DNSMetrics struct {
soa *prometheus.GaugeVec
rtt *prometheus.GaugeVec
valid *prometheus.GaugeVec
label_map []prometheus.Labels
}
type DNSServer struct {
role string
name string
ip uint8
addr string
soa uint
}
var dns_servers = []*DNSServer{
&DNSServer{"master", "master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0},
&DNSServer{"delegation", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0},
&DNSServer{"delegation", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0},
&DNSServer{"delegation", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0},
&DNSServer{"delegation", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0},
&DNSServer{"delegation", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0},
&DNSServer{"delegation", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0},
&DNSServer{"recursive", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0},
&DNSServer{"recursive", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0},
&DNSServer{"recursive", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0},
&DNSServer{"recursive", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0},
&DNSServer{"recursive", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0},
&DNSServer{"recursive", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0},
&DNSServer{"recursive", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0},
&DNSServer{"recursive", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0},
}
//////////////////////////////////////////////////////////////////////////
func (m *DNSMetrics) Register() {
m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_soa",
Help: "SOA for .dn42 domain",
}, []string{"role", "name", "ip", "addr"})
prometheus.MustRegister(m.soa)
m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_rtt",
Help: "RTT when collecting SOA for .dn42 domain",
}, []string{"role", "name", "ip", "addr"})
prometheus.MustRegister(m.rtt)
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "dn42_dns_valid",
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
}, []string{"role", "name", "ip", "addr"})
prometheus.MustRegister(m.valid)
// pre-populate the labels
m.label_map = make([]prometheus.Labels, len(dns_servers))
for ix, server := range dns_servers {
m.label_map[ix] = prometheus.Labels{
"role": server.role,
"name": server.name,
"ip": strconv.Itoa(int(server.ip)),
"addr": server.addr,
}
}
}
//////////////////////////////////////////////////////////////////////////
func (m *DNSMetrics) Collect() {
for ix, server := range dns_servers {
soa, rtt := server.Query()
server.soa = uint(soa)
m.soa.With(m.label_map[ix]).Set(soa)
m.rtt.With(m.label_map[ix]).Set(rtt)
var valid uint = 0
if server.soa == 0 {
valid = 2
} else {
if server.soa != dns_servers[0].soa {
valid = 1
} else {
valid = 0
}
}
m.valid.With(m.label_map[ix]).Set(float64(valid))
}
}
//////////////////////////////////////////////////////////////////////////
func (s *DNSServer) Query() (float64, float64) {
msg := new(dns.Msg)
msg.Id = dns.Id()
msg.RecursionDesired = (s.role == "recursive")
msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}}
client := new(dns.Client)
resp, rtt, err := client.Exchange(msg, s.addr)
if err != nil || len(resp.Answer) != 1 {
log.WithFields(log.Fields{
"error": err,
"resp": resp,
"server": s,
}).Warn("Unable to query DNS server")
return 0, 0
}
if soa, ok := resp.Answer[0].(*dns.SOA); ok {
return float64(soa.Serial), math.Round(rtt.Seconds() * 1000)
}
return 0, 0
}
//////////////////////////////////////////////////////////////////////////
// end of code