commit 34384f75654a9e1db7e80c78ac1544e7de5f5c5b Author: Simon Marsh Date: Fri May 10 20:29:37 2019 +0100 Initial commit to provide DNS stats diff --git a/README.md b/README.md new file mode 100644 index 0000000..ef9d6ca --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# dn42promsrv + +A small go server to provide custom DN42 related statistics to prometheus. + +Current stats include: + +* DNS availability and SOA Serial checks +* + diff --git a/contrib/dn42promsrv.service b/contrib/dn42promsrv.service new file mode 100644 index 0000000..0349925 --- /dev/null +++ b/contrib/dn42promsrv.service @@ -0,0 +1,29 @@ +########################################################################## +# dn42promsrv example systemd service file +########################################################################## + +[Unit] +Description=DN42 Prometheus Stats Server +After=network.target + +[Install] +WantedBy=multi-user.target + +[Service] +User=promsrv +Group=promsrv +Type=simple +Restart=on-failure +# service hardening +ProtectSystem=strict +NoNewPrivileges=yes +ProtectControlGroups=yes +PrivateTmp=yes +PrivateDevices=yes +DevicePolicy=closed +MemoryDenyWriteExecute=yes +# +ExecStart=/usr/local/bin/dn42promsrv + +######################################################################### +# end of file diff --git a/dn42promsrv.go b/dn42promsrv.go new file mode 100644 index 0000000..43a03b2 --- /dev/null +++ b/dn42promsrv.go @@ -0,0 +1,192 @@ +////////////////////////////////////////////////////////////////////////// +// DN42 Prometheus Stats Server +////////////////////////////////////////////////////////////////////////// + +package main + +////////////////////////////////////////////////////////////////////////// + +import ( + "context" + "github.com/gorilla/mux" + "github.com/prometheus/client_golang/prometheus/promhttp" + log "github.com/sirupsen/logrus" + flag "github.com/spf13/pflag" + "net/http" + "os" + "os/signal" + "sync" + "time" +) + +////////////////////////////////////////////////////////////////////////// + +type Metric interface { + Register() + Collect() +} + +////////////////////////////////////////////////////////////////////////// +// Set the log level + +func setLogLevel(levelStr string) { + + if level, err := log.ParseLevel(levelStr); err != nil { + // failed to set the level + + // set a sensible default and, of course, log the error + log.SetLevel(log.InfoLevel) + log.WithFields(log.Fields{ + "loglevel": levelStr, + "error": err, + }).Error("Failed to set requested log level") + + } else { + + // set the requested level + log.SetLevel(level) + + } +} + +////////////////////////////////////////////////////////////////////////// +// collect metrics + +func collectMetrics(stop chan bool, + notify *sync.WaitGroup, interval time.Duration, + metrics []Metric) { + + notify.Add(1) + defer notify.Done() + + ticker := time.NewTicker(interval) + defer ticker.Stop() + + log.WithFields(log.Fields{ + "Interval": interval, + }).Info("Starting data collection") + + for { + select { + case <-stop: + // stop updating + return + case <-ticker.C: + // timer expired, perform an update + + for _, metric := range metrics { + metric.Collect() + } + + } + } + +} + +////////////////////////////////////////////////////////////////////////// +// initialise metrics + +func initMetrics() []Metric { + + metrics := make([]Metric, 1) + + metrics[0] = &DNSMetrics{} + + return metrics +} + +////////////////////////////////////////////////////////////////////////// + +func main() { + + // set a default log level, so that logging can be used immediately + // the level will be overidden later on once the command line + // options are loaded + log.SetLevel(log.InfoLevel) + log.Info("DN42 Stats Server Starting") + + // declare cmd line options + var ( + logLevel = flag.StringP("LogLevel", "l", "Info", "Log level") + bindAddress = flag.StringP("BindAddress", "b", ":8001", "Server bind address") + refreshInterval = flag.StringP("Refresh", "i", "1m", "Refresh interval") + ) + flag.Parse() + + // now initialise logging properly based on the cmd line options + setLogLevel(*logLevel) + + // parse the refreshInterval and start data collection + interval, err := time.ParseDuration(*refreshInterval) + if err != nil { + log.WithFields(log.Fields{ + "error": err, + "interval": *refreshInterval, + }).Fatal("Unable to parse refresh interval") + } + + // initialise and register metrics + metrics := initMetrics() + + for _, metric := range metrics { + metric.Register() + } + + // start metric collection + notify_complete := &sync.WaitGroup{} + stop_collection := make(chan bool) + go collectMetrics(stop_collection, notify_complete, interval, metrics) + + // initialise router and install prom handler + router := mux.NewRouter() + router.Handle("/metrics", promhttp.Handler()) + + // initialise http server + server := &http.Server{ + Addr: *bindAddress, + WriteTimeout: time.Second * 15, + ReadTimeout: time.Second * 15, + IdleTimeout: time.Second * 60, + Handler: router, + } + + // run the server in a non-blocking goroutine + + log.WithFields(log.Fields{ + "BindAddress": *bindAddress, + }).Info("Starting server") + + go func() { + if err := server.ListenAndServe(); err != nil { + log.WithFields(log.Fields{ + "error": err, + "BindAddress": *bindAddress, + }).Fatal("Unable to start server") + } + }() + + // graceful shutdown via SIGINT (^C) + csig := make(chan os.Signal, 1) + signal.Notify(csig, os.Interrupt) + + // and block + <-csig + + log.Info("Server shutting down") + + // deadline for server to shutdown + ctx, cancel := context.WithTimeout(context.Background(), 10) + defer cancel() + + // shutdown stats collection and the server + close(stop_collection) + notify_complete.Wait() + server.Shutdown(ctx) + + // nothing left to do + log.Info("Shutdown complete, all done") + os.Exit(0) +} + +////////////////////////////////////////////////////////////////////////// +// end of code diff --git a/dns.go b/dns.go new file mode 100644 index 0000000..1dcfb45 --- /dev/null +++ b/dns.go @@ -0,0 +1,146 @@ +////////////////////////////////////////////////////////////////////////// +// DNS Metrics +////////////////////////////////////////////////////////////////////////// + +package main + +////////////////////////////////////////////////////////////////////////// + +import ( + // "fmt" + dns "github.com/miekg/dns" + "github.com/prometheus/client_golang/prometheus" + log "github.com/sirupsen/logrus" + "math" + "strconv" +) + +////////////////////////////////////////////////////////////////////////// + +type DNSMetrics struct { + soa *prometheus.GaugeVec + rtt *prometheus.GaugeVec + valid *prometheus.GaugeVec + label_map []prometheus.Labels +} + +type DNSServer struct { + role string + name string + ip uint8 + addr string + soa uint +} + +var dns_servers = []*DNSServer{ + &DNSServer{"master", "master.delegation-servers.dn42", 6, "[fd42:180:3de0:10:5054:ff:fe87:ea39]:53", 0}, + &DNSServer{"delegation", "b.delegation-servers.dn42", 4, "172.20.129.1:53", 0}, + &DNSServer{"delegation", "b.delegation-servers.dn42", 6, "[fd42:4242:2601:ac53::1]:53", 0}, + &DNSServer{"delegation", "j.delegation-servers.dn42", 4, "172.20.1.18:53", 0}, + &DNSServer{"delegation", "j.delegation-servers.dn42", 6, "[fd42:5d71:219:0:1::42]:53", 0}, + &DNSServer{"delegation", "y.delegation-servers.dn42", 4, "172.20.20.66:53", 0}, + &DNSServer{"delegation", "y.delegation-servers.dn42", 6, "[fd42:c01d:beef::3]:53", 0}, + &DNSServer{"recursive", "a.recursive-servers.dn42", 4, "172.20.0.53:53", 0}, + &DNSServer{"recursive", "a.recursive-servers.dn42", 6, "[fd42:d42:d42:54::1]:53", 0}, + &DNSServer{"recursive", "b.recursive-servers.dn42", 4, "172.20.129.2:53", 0}, + &DNSServer{"recursive", "b.recursive-servers.dn42", 6, "[fd42:4242:2601:ac53::53]:53", 0}, + &DNSServer{"recursive", "j.recursive-servers.dn42", 4, "172.20.1.19:53", 0}, + &DNSServer{"recursive", "j.recursive-servers.dn42", 6, "[fd42:5d71:219:0:1::43]:53", 0}, + &DNSServer{"recursive", "y.recursive-servers.dn42", 4, "172.20.20.65:53", 0}, + &DNSServer{"recursive", "y.recursive-servers.dn42", 6, "[fd42:c01d:beef::2]:53", 0}, +} + +////////////////////////////////////////////////////////////////////////// + +func (m *DNSMetrics) Register() { + + m.soa = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "dn42_dns_soa", + Help: "SOA for .dn42 domain", + }, []string{"role", "name", "ip", "addr"}) + prometheus.MustRegister(m.soa) + + m.rtt = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "dn42_dns_rtt", + Help: "RTT when collecting SOA for .dn42 domain", + }, []string{"role", "name", "ip", "addr"}) + prometheus.MustRegister(m.rtt) + + m.valid = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "dn42_dns_valid", + Help: "0 = response and correct serial, 1 = response but incorrect serial, 2 = no response", + }, []string{"role", "name", "ip", "addr"}) + prometheus.MustRegister(m.valid) + + // pre-populate the labels + m.label_map = make([]prometheus.Labels, len(dns_servers)) + + for ix, server := range dns_servers { + m.label_map[ix] = prometheus.Labels{ + "role": server.role, + "name": server.name, + "ip": strconv.Itoa(int(server.ip)), + "addr": server.addr, + } + } + +} + +////////////////////////////////////////////////////////////////////////// + +func (m *DNSMetrics) Collect() { + + for ix, server := range dns_servers { + soa, rtt := server.Query() + server.soa = uint(soa) + + m.soa.With(m.label_map[ix]).Set(soa) + m.rtt.With(m.label_map[ix]).Set(rtt) + + var valid uint = 0 + + if server.soa == 0 { + valid = 2 + } else { + if server.soa != dns_servers[0].soa { + valid = 1 + } else { + valid = 0 + } + } + + m.valid.With(m.label_map[ix]).Set(float64(valid)) + } + +} + +////////////////////////////////////////////////////////////////////////// + +func (s *DNSServer) Query() (float64, float64) { + + msg := new(dns.Msg) + msg.Id = dns.Id() + msg.RecursionDesired = (s.role == "recursive") + msg.Question = []dns.Question{{"dn42.", dns.TypeSOA, dns.ClassINET}} + + client := new(dns.Client) + resp, rtt, err := client.Exchange(msg, s.addr) + if err != nil || len(resp.Answer) != 1 { + log.WithFields(log.Fields{ + "error": err, + "resp": resp, + "server": s, + }).Warn("Unable to query DNS server") + + return 0, 0 + } + + if soa, ok := resp.Answer[0].(*dns.SOA); ok { + return float64(soa.Serial), math.Round(rtt.Seconds() * 1000) + } + + return 0, 0 +} + +////////////////////////////////////////////////////////////////////////// +// end of code