Initial commit to provide DNS stats
This commit is contained in:
commit
34384f7565
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
||||
# dn42promsrv
|
||||
|
||||
A small go server to provide custom DN42 related statistics to prometheus.
|
||||
|
||||
Current stats include:
|
||||
|
||||
* DNS availability and SOA Serial checks
|
||||
*
|
||||
|
29
contrib/dn42promsrv.service
Normal file
29
contrib/dn42promsrv.service
Normal file
@ -0,0 +1,29 @@
|
||||
##########################################################################
|
||||
# dn42promsrv example systemd service file
|
||||
##########################################################################
|
||||
|
||||
[Unit]
|
||||
Description=DN42 Prometheus Stats Server
|
||||
After=network.target
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
[Service]
|
||||
User=promsrv
|
||||
Group=promsrv
|
||||
Type=simple
|
||||
Restart=on-failure
|
||||
# service hardening
|
||||
ProtectSystem=strict
|
||||
NoNewPrivileges=yes
|
||||
ProtectControlGroups=yes
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
DevicePolicy=closed
|
||||
MemoryDenyWriteExecute=yes
|
||||
#
|
||||
ExecStart=/usr/local/bin/dn42promsrv
|
||||
|
||||
#########################################################################
|
||||
# end of file
|
192
dn42promsrv.go
Normal file
192
dn42promsrv.go
Normal file
@ -0,0 +1,192 @@
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// DN42 Prometheus Stats Server
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
package main
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
flag "github.com/spf13/pflag"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
type Metric interface {
|
||||
Register()
|
||||
Collect()
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Set the log level
|
||||
|
||||
func setLogLevel(levelStr string) {
|
||||
|
||||
if level, err := log.ParseLevel(levelStr); err != nil {
|
||||
// failed to set the level
|
||||
|
||||
// set a sensible default and, of course, log the error
|
||||
log.SetLevel(log.InfoLevel)
|
||||
log.WithFields(log.Fields{
|
||||
"loglevel": levelStr,
|
||||
"error": err,
|
||||
}).Error("Failed to set requested log level")
|
||||
|
||||
} else {
|
||||
|
||||
// set the requested level
|
||||
log.SetLevel(level)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// collect metrics
|
||||
|
||||
func collectMetrics(stop chan bool,
|
||||
notify *sync.WaitGroup, interval time.Duration,
|
||||
metrics []Metric) {
|
||||
|
||||
notify.Add(1)
|
||||
defer notify.Done()
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"Interval": interval,
|
||||
}).Info("Starting data collection")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-stop:
|
||||
// stop updating
|
||||
return
|
||||
case <-ticker.C:
|
||||
// timer expired, perform an update
|
||||
|
||||
for _, metric := range metrics {
|
||||
metric.Collect()
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// initialise metrics
|
||||
|
||||
func initMetrics() []Metric {
|
||||
|
||||
metrics := make([]Metric, 1)
|
||||
|
||||
metrics[0] = &DNSMetrics{}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
func main() {
|
||||
|
||||
// set a default log level, so that logging can be used immediately
|
||||
// the level will be overidden later on once the command line
|
||||
// options are loaded
|
||||
log.SetLevel(log.InfoLevel)
|
||||
log.Info("DN42 Stats Server Starting")
|
||||
|
||||
// declare cmd line options
|
||||
var (
|
||||
logLevel = flag.StringP("LogLevel", "l", "Info", "Log level")
|
||||
bindAddress = flag.StringP("BindAddress", "b", ":8001", "Server bind address")
|
||||
refreshInterval = flag.StringP("Refresh", "i", "1m", "Refresh interval")
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
// now initialise logging properly based on the cmd line options
|
||||
setLogLevel(*logLevel)
|
||||
|
||||
// parse the refreshInterval and start data collection
|
||||
interval, err := time.ParseDuration(*refreshInterval)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"error": err,
|
||||
"interval": *refreshInterval,
|
||||
}).Fatal("Unable to parse refresh interval")
|
||||
}
|
||||
|
||||
// initialise and register metrics
|
||||
metrics := initMetrics()
|
||||
|
||||
for _, metric := range metrics {
|
||||
metric.Register()
|
||||
}
|
||||
|
||||
// start metric collection
|
||||
notify_complete := &sync.WaitGroup{}
|
||||
stop_collection := make(chan bool)
|
||||
go collectMetrics(stop_collection, notify_complete, interval, metrics)
|
||||
|
||||
// initialise router and install prom handler
|
||||
router := mux.NewRouter()
|
||||
router.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
// initialise http server
|
||||
server := &http.Server{
|
||||
Addr: *bindAddress,
|
||||
WriteTimeout: time.Second * 15,
|
||||
ReadTimeout: time.Second * 15,
|
||||
IdleTimeout: time.Second * 60,
|
||||
Handler: router,
|
||||
}
|
||||
|
||||
// run the server in a non-blocking goroutine
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"BindAddress": *bindAddress,
|
||||
}).Info("Starting server")
|
||||
|
||||
go func() {
|
||||
if err := server.ListenAndServe(); err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"error": err,
|
||||
"BindAddress": *bindAddress,
|
||||
}).Fatal("Unable to start server")
|
||||
}
|
||||
}()
|
||||
|
||||
// graceful shutdown via SIGINT (^C)
|
||||
csig := make(chan os.Signal, 1)
|
||||
signal.Notify(csig, os.Interrupt)
|
||||
|
||||
// and block
|
||||
<-csig
|
||||
|
||||
log.Info("Server shutting down")
|
||||
|
||||
// deadline for server to shutdown
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10)
|
||||
defer cancel()
|
||||
|
||||
// shutdown stats collection and the server
|
||||
close(stop_collection)
|
||||
notify_complete.Wait()
|
||||
server.Shutdown(ctx)
|
||||
|
||||
// nothing left to do
|
||||
log.Info("Shutdown complete, all done")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// end of code
|
146
dns.go
Normal file
146
dns.go
Normal file
@ -0,0 +1,146 @@
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// DNS Metrics
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
package main
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
import (
|
||||
// "fmt"
|
||||
dns "github.com/miekg/dns"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
type DNSMetrics struct {
|
||||
soa *prometheus.GaugeVec
|
||||
rtt *prometheus.GaugeVec
|
||||
valid *prometheus.GaugeVec
|
||||
label_map []prometheus.Labels
|
||||
}
|
||||
|
||||
type DNSServer struct {
|
||||
role string
|
||||
name string
|
||||
ip uint8
|
||||
addr string
|
||||
soa uint
|
||||
}
|
||||
|
||||
var dns_servers = []*DNSServer{
|
||||
&DNSServer{"master", "master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0},
|
||||
&DNSServer{"delegation", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0},
|
||||
&DNSServer{"delegation", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0},
|
||||
&DNSServer{"delegation", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0},
|
||||
&DNSServer{"delegation", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0},
|
||||
&DNSServer{"delegation", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0},
|
||||
&DNSServer{"delegation", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0},
|
||||
&DNSServer{"recursive", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0},
|
||||
&DNSServer{"recursive", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0},
|
||||
&DNSServer{"recursive", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0},
|
||||
&DNSServer{"recursive", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0},
|
||||
&DNSServer{"recursive", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0},
|
||||
&DNSServer{"recursive", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0},
|
||||
&DNSServer{"recursive", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0},
|
||||
&DNSServer{"recursive", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0},
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
func (m *DNSMetrics) Register() {
|
||||
|
||||
m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "dn42_dns_soa",
|
||||
Help: "SOA for .dn42 domain",
|
||||
}, []string{"role", "name", "ip", "addr"})
|
||||
prometheus.MustRegister(m.soa)
|
||||
|
||||
m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "dn42_dns_rtt",
|
||||
Help: "RTT when collecting SOA for .dn42 domain",
|
||||
}, []string{"role", "name", "ip", "addr"})
|
||||
prometheus.MustRegister(m.rtt)
|
||||
|
||||
m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "dn42_dns_valid",
|
||||
Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response",
|
||||
}, []string{"role", "name", "ip", "addr"})
|
||||
prometheus.MustRegister(m.valid)
|
||||
|
||||
// pre-populate the labels
|
||||
m.label_map = make([]prometheus.Labels, len(dns_servers))
|
||||
|
||||
for ix, server := range dns_servers {
|
||||
m.label_map[ix] = prometheus.Labels{
|
||||
"role": server.role,
|
||||
"name": server.name,
|
||||
"ip": strconv.Itoa(int(server.ip)),
|
||||
"addr": server.addr,
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
func (m *DNSMetrics) Collect() {
|
||||
|
||||
for ix, server := range dns_servers {
|
||||
soa, rtt := server.Query()
|
||||
server.soa = uint(soa)
|
||||
|
||||
m.soa.With(m.label_map[ix]).Set(soa)
|
||||
m.rtt.With(m.label_map[ix]).Set(rtt)
|
||||
|
||||
var valid uint = 0
|
||||
|
||||
if server.soa == 0 {
|
||||
valid = 2
|
||||
} else {
|
||||
if server.soa != dns_servers[0].soa {
|
||||
valid = 1
|
||||
} else {
|
||||
valid = 0
|
||||
}
|
||||
}
|
||||
|
||||
m.valid.With(m.label_map[ix]).Set(float64(valid))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
func (s *DNSServer) Query() (float64, float64) {
|
||||
|
||||
msg := new(dns.Msg)
|
||||
msg.Id = dns.Id()
|
||||
msg.RecursionDesired = (s.role == "recursive")
|
||||
msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}}
|
||||
|
||||
client := new(dns.Client)
|
||||
resp, rtt, err := client.Exchange(msg, s.addr)
|
||||
if err != nil || len(resp.Answer) != 1 {
|
||||
log.WithFields(log.Fields{
|
||||
"error": err,
|
||||
"resp": resp,
|
||||
"server": s,
|
||||
}).Warn("Unable to query DNS server")
|
||||
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
if soa, ok := resp.Answer[0].(*dns.SOA); ok {
|
||||
return float64(soa.Serial), math.Round(rtt.Seconds() * 1000)
|
||||
}
|
||||
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// end of code
|
Loading…
x
Reference in New Issue
Block a user