Initial commit to provide DNS stats
This commit is contained in:
commit
34384f7565
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# dn42promsrv
|
||||||
|
|
||||||
|
A small go server to provide custom DN42 related statistics to prometheus.
|
||||||
|
|
||||||
|
Current stats include:
|
||||||
|
|
||||||
|
* DNS availability and SOA Serial checks
|
||||||
|
*
|
||||||
|
|
29
contrib/dn42promsrv.service
Normal file
29
contrib/dn42promsrv.service
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
##########################################################################
|
||||||
|
# dn42promsrv example systemd service file
|
||||||
|
##########################################################################
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=DN42 Prometheus Stats Server
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=promsrv
|
||||||
|
Group=promsrv
|
||||||
|
Type=simple
|
||||||
|
Restart=on-failure
|
||||||
|
# service hardening
|
||||||
|
ProtectSystem=strict
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
ProtectControlGroups=yes
|
||||||
|
PrivateTmp=yes
|
||||||
|
PrivateDevices=yes
|
||||||
|
DevicePolicy=closed
|
||||||
|
MemoryDenyWriteExecute=yes
|
||||||
|
#
|
||||||
|
ExecStart=/usr/local/bin/dn42promsrv
|
||||||
|
|
||||||
|
#########################################################################
|
||||||
|
# end of file
|
192
dn42promsrv.go
Normal file
192
dn42promsrv.go
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// DN42 Prometheus Stats Server
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
flag "github.com/spf13/pflag"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
type Metric interface {
|
||||||
|
Register()
|
||||||
|
Collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Set the log level
|
||||||
|
|
||||||
|
func setLogLevel(levelStr string) {
|
||||||
|
|
||||||
|
if level, err := log.ParseLevel(levelStr); err != nil {
|
||||||
|
// failed to set the level
|
||||||
|
|
||||||
|
// set a sensible default and, of course, log the error
|
||||||
|
log.SetLevel(log.InfoLevel)
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"loglevel": levelStr,
|
||||||
|
"error": err,
|
||||||
|
}).Error("Failed to set requested log level")
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// set the requested level
|
||||||
|
log.SetLevel(level)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// collect metrics
|
||||||
|
|
||||||
|
func collectMetrics(stop chan bool,
|
||||||
|
notify *sync.WaitGroup, interval time.Duration,
|
||||||
|
metrics []Metric) {
|
||||||
|
|
||||||
|
notify.Add(1)
|
||||||
|
defer notify.Done()
|
||||||
|
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"Interval": interval,
|
||||||
|
}).Info("Starting data collection")
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-stop:
|
||||||
|
// stop updating
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
// timer expired, perform an update
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
metric.Collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// initialise metrics
|
||||||
|
|
||||||
|
func initMetrics() []Metric {
|
||||||
|
|
||||||
|
metrics := make([]Metric, 1)
|
||||||
|
|
||||||
|
metrics[0] = &DNSMetrics{}
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
// set a default log level, so that logging can be used immediately
|
||||||
|
// the level will be overidden later on once the command line
|
||||||
|
// options are loaded
|
||||||
|
log.SetLevel(log.InfoLevel)
|
||||||
|
log.Info("DN42 Stats Server Starting")
|
||||||
|
|
||||||
|
// declare cmd line options
|
||||||
|
var (
|
||||||
|
logLevel = flag.StringP("LogLevel", "l", "Info", "Log level")
|
||||||
|
bindAddress = flag.StringP("BindAddress", "b", ":8001", "Server bind address")
|
||||||
|
refreshInterval = flag.StringP("Refresh", "i", "1m", "Refresh interval")
|
||||||
|
)
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// now initialise logging properly based on the cmd line options
|
||||||
|
setLogLevel(*logLevel)
|
||||||
|
|
||||||
|
// parse the refreshInterval and start data collection
|
||||||
|
interval, err := time.ParseDuration(*refreshInterval)
|
||||||
|
if err != nil {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"error": err,
|
||||||
|
"interval": *refreshInterval,
|
||||||
|
}).Fatal("Unable to parse refresh interval")
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialise and register metrics
|
||||||
|
metrics := initMetrics()
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
metric.Register()
|
||||||
|
}
|
||||||
|
|
||||||
|
// start metric collection
|
||||||
|
notify_complete := &sync.WaitGroup{}
|
||||||
|
stop_collection := make(chan bool)
|
||||||
|
go collectMetrics(stop_collection, notify_complete, interval, metrics)
|
||||||
|
|
||||||
|
// initialise router and install prom handler
|
||||||
|
router := mux.NewRouter()
|
||||||
|
router.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
|
// initialise http server
|
||||||
|
server := &http.Server{
|
||||||
|
Addr: *bindAddress,
|
||||||
|
WriteTimeout: time.Second * 15,
|
||||||
|
ReadTimeout: time.Second * 15,
|
||||||
|
IdleTimeout: time.Second * 60,
|
||||||
|
Handler: router,
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the server in a non-blocking goroutine
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"BindAddress": *bindAddress,
|
||||||
|
}).Info("Starting server")
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if err := server.ListenAndServe(); err != nil {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"error": err,
|
||||||
|
"BindAddress": *bindAddress,
|
||||||
|
}).Fatal("Unable to start server")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// graceful shutdown via SIGINT (^C)
|
||||||
|
csig := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(csig, os.Interrupt)
|
||||||
|
|
||||||
|
// and block
|
||||||
|
<-csig
|
||||||
|
|
||||||
|
log.Info("Server shutting down")
|
||||||
|
|
||||||
|
// deadline for server to shutdown
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// shutdown stats collection and the server
|
||||||
|
close(stop_collection)
|
||||||
|
notify_complete.Wait()
|
||||||
|
server.Shutdown(ctx)
|
||||||
|
|
||||||
|
// nothing left to do
|
||||||
|
log.Info("Shutdown complete, all done")
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// end of code
|
146
dns.go
Normal file
146
dns.go
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// DNS Metrics
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
import (
|
||||||
|
// "fmt"
|
||||||
|
dns "github.com/miekg/dns"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
type DNSMetrics struct {
|
||||||
|
soa *prometheus.GaugeVec
|
||||||
|
rtt *prometheus.GaugeVec
|
||||||
|
valid *prometheus.GaugeVec
|
||||||
|
label_map []prometheus.Labels
|
||||||
|
}
|
||||||
|
|
||||||
|
type DNSServer struct {
|
||||||
|
role string
|
||||||
|
name string
|
||||||
|
ip uint8
|
||||||
|
addr string
|
||||||
|
soa uint
|
||||||
|
}
|
||||||
|
|
||||||
|
var dns_servers = []*DNSServer{
|
||||||
|
&DNSServer{"master", "master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0},
|
||||||
|
&DNSServer{"delegation", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0},
|
||||||
|
&DNSServer{"delegation", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0},
|
||||||
|
&DNSServer{"delegation", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0},
|
||||||
|
&DNSServer{"delegation", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0},
|
||||||
|
&DNSServer{"delegation", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0},
|
||||||
|
&DNSServer{"delegation", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0},
|
||||||
|
&DNSServer{"recursive", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0},
|
||||||
|
&DNSServer{"recursive", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0},
|
||||||
|
&DNSServer{"recursive", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0},
|
||||||
|
&DNSServer{"recursive", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0},
|
||||||
|
&DNSServer{"recursive", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0},
|
||||||
|
&DNSServer{"recursive", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0},
|
||||||
|
&DNSServer{"recursive", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0},
|
||||||
|
&DNSServer{"recursive", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
func (m *DNSMetrics) Register() {
|
||||||
|
|
||||||
|
m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "dn42_dns_soa",
|
||||||
|
Help: "SOA for .dn42 domain",
|
||||||
|
}, []string{"role", "name", "ip", "addr"})
|
||||||
|
prometheus.MustRegister(m.soa)
|
||||||
|
|
||||||
|
m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "dn42_dns_rtt",
|
||||||
|
Help: "RTT when collecting SOA for .dn42 domain",
|
||||||
|
}, []string{"role", "name", "ip", "addr"})
|
||||||
|
prometheus.MustRegister(m.rtt)
|
||||||
|
|
||||||
|
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "dn42_dns_valid",
|
||||||
|
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
|
||||||
|
}, []string{"role", "name", "ip", "addr"})
|
||||||
|
prometheus.MustRegister(m.valid)
|
||||||
|
|
||||||
|
// pre-populate the labels
|
||||||
|
m.label_map = make([]prometheus.Labels, len(dns_servers))
|
||||||
|
|
||||||
|
for ix, server := range dns_servers {
|
||||||
|
m.label_map[ix] = prometheus.Labels{
|
||||||
|
"role": server.role,
|
||||||
|
"name": server.name,
|
||||||
|
"ip": strconv.Itoa(int(server.ip)),
|
||||||
|
"addr": server.addr,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
func (m *DNSMetrics) Collect() {
|
||||||
|
|
||||||
|
for ix, server := range dns_servers {
|
||||||
|
soa, rtt := server.Query()
|
||||||
|
server.soa = uint(soa)
|
||||||
|
|
||||||
|
m.soa.With(m.label_map[ix]).Set(soa)
|
||||||
|
m.rtt.With(m.label_map[ix]).Set(rtt)
|
||||||
|
|
||||||
|
var valid uint = 0
|
||||||
|
|
||||||
|
if server.soa == 0 {
|
||||||
|
valid = 2
|
||||||
|
} else {
|
||||||
|
if server.soa != dns_servers[0].soa {
|
||||||
|
valid = 1
|
||||||
|
} else {
|
||||||
|
valid = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.valid.With(m.label_map[ix]).Set(float64(valid))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
func (s *DNSServer) Query() (float64, float64) {
|
||||||
|
|
||||||
|
msg := new(dns.Msg)
|
||||||
|
msg.Id = dns.Id()
|
||||||
|
msg.RecursionDesired = (s.role == "recursive")
|
||||||
|
msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}}
|
||||||
|
|
||||||
|
client := new(dns.Client)
|
||||||
|
resp, rtt, err := client.Exchange(msg, s.addr)
|
||||||
|
if err != nil || len(resp.Answer) != 1 {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"error": err,
|
||||||
|
"resp": resp,
|
||||||
|
"server": s,
|
||||||
|
}).Warn("Unable to query DNS server")
|
||||||
|
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if soa, ok := resp.Answer[0].(*dns.SOA); ok {
|
||||||
|
return float64(soa.Serial), math.Round(rtt.Seconds() * 1000)
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// end of code
|
Loading…
x
Reference in New Issue
Block a user