github.com/crowdsecurity/crowdsec@v1.6.1/cmd/crowdsec/metrics.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"net/http"
     6  	"time"
     7  
     8  	"github.com/prometheus/client_golang/prometheus"
     9  	"github.com/prometheus/client_golang/prometheus/promhttp"
    10  	log "github.com/sirupsen/logrus"
    11  
    12  	"github.com/crowdsecurity/go-cs-lib/trace"
    13  	"github.com/crowdsecurity/go-cs-lib/version"
    14  
    15  	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
    16  	v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1"
    17  	"github.com/crowdsecurity/crowdsec/pkg/cache"
    18  	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
    19  	"github.com/crowdsecurity/crowdsec/pkg/database"
    20  	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
    21  	leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
    22  	"github.com/crowdsecurity/crowdsec/pkg/parser"
    23  )
    24  
    25  /*prometheus*/
    26  var globalParserHits = prometheus.NewCounterVec(
    27  	prometheus.CounterOpts{
    28  		Name: "cs_parser_hits_total",
    29  		Help: "Total events entered the parser.",
    30  	},
    31  	[]string{"source", "type"},
    32  )
    33  var globalParserHitsOk = prometheus.NewCounterVec(
    34  	prometheus.CounterOpts{
    35  		Name: "cs_parser_hits_ok_total",
    36  		Help: "Total events were successfully parsed.",
    37  	},
    38  	[]string{"source", "type"},
    39  )
    40  var globalParserHitsKo = prometheus.NewCounterVec(
    41  	prometheus.CounterOpts{
    42  		Name: "cs_parser_hits_ko_total",
    43  		Help: "Total events were unsuccessfully parsed.",
    44  	},
    45  	[]string{"source", "type"},
    46  )
    47  
    48  var globalBucketPourKo = prometheus.NewCounter(
    49  	prometheus.CounterOpts{
    50  		Name: "cs_bucket_pour_ko_total",
    51  		Help: "Total events were not poured in a bucket.",
    52  	},
    53  )
    54  
    55  var globalBucketPourOk = prometheus.NewCounter(
    56  	prometheus.CounterOpts{
    57  		Name: "cs_bucket_pour_ok_total",
    58  		Help: "Total events were poured in at least one bucket.",
    59  	},
    60  )
    61  
    62  var globalCsInfo = prometheus.NewGauge(
    63  	prometheus.GaugeOpts{
    64  		Name:        "cs_info",
    65  		Help:        "Information about Crowdsec.",
    66  		ConstLabels: prometheus.Labels{"version": version.String()},
    67  	},
    68  )
    69  
    70  var globalActiveDecisions = prometheus.NewGaugeVec(
    71  	prometheus.GaugeOpts{
    72  		Name: "cs_active_decisions",
    73  		Help: "Number of active decisions.",
    74  	},
    75  	[]string{"reason", "origin", "action"},
    76  )
    77  
    78  var globalAlerts = prometheus.NewGaugeVec(
    79  	prometheus.GaugeOpts{
    80  		Name: "cs_alerts",
    81  		Help: "Number of alerts (excluding CAPI).",
    82  	},
    83  	[]string{"reason"},
    84  )
    85  
    86  var globalParsingHistogram = prometheus.NewHistogramVec(
    87  	prometheus.HistogramOpts{
    88  		Help:    "Time spent parsing a line",
    89  		Name:    "cs_parsing_time_seconds",
    90  		Buckets: []float64{0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.004, 0.005, 0.0075, 0.01},
    91  	},
    92  	[]string{"type", "source"},
    93  )
    94  
    95  var globalPourHistogram = prometheus.NewHistogramVec(
    96  	prometheus.HistogramOpts{
    97  		Name:    "cs_bucket_pour_seconds",
    98  		Help:    "Time spent pouring an event to buckets.",
    99  		Buckets: []float64{0.001, 0.002, 0.005, 0.01, 0.015, 0.02, 0.03, 0.04, 0.05},
   100  	},
   101  	[]string{"type", "source"},
   102  )
   103  
   104  func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.HandlerFunc {
   105  	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   106  		// catch panics here because they are not handled by servePrometheus
   107  		defer trace.CatchPanic("crowdsec/computeDynamicMetrics")
   108  		// update cache metrics (stash)
   109  		cache.UpdateCacheMetrics()
   110  		// update cache metrics (regexp)
   111  		exprhelpers.UpdateRegexpCacheMetrics()
   112  
   113  		// decision metrics are only relevant for LAPI
   114  		if dbClient == nil {
   115  			next.ServeHTTP(w, r)
   116  			return
   117  		}
   118  
   119  		decisionsFilters := make(map[string][]string, 0)
   120  
   121  		decisions, err := dbClient.QueryDecisionCountByScenario(decisionsFilters)
   122  		if err != nil {
   123  			log.Errorf("Error querying decisions for metrics: %v", err)
   124  			next.ServeHTTP(w, r)
   125  
   126  			return
   127  		}
   128  
   129  		globalActiveDecisions.Reset()
   130  
   131  		for _, d := range decisions {
   132  			globalActiveDecisions.With(prometheus.Labels{"reason": d.Scenario, "origin": d.Origin, "action": d.Type}).Set(float64(d.Count))
   133  		}
   134  
   135  		globalAlerts.Reset()
   136  
   137  		alertsFilter := map[string][]string{
   138  			"include_capi": {"false"},
   139  		}
   140  
   141  		alerts, err := dbClient.AlertsCountPerScenario(alertsFilter)
   142  
   143  		if err != nil {
   144  			log.Errorf("Error querying alerts for metrics: %v", err)
   145  			next.ServeHTTP(w, r)
   146  
   147  			return
   148  		}
   149  
   150  		for k, v := range alerts {
   151  			globalAlerts.With(prometheus.Labels{"reason": k}).Set(float64(v))
   152  		}
   153  
   154  		next.ServeHTTP(w, r)
   155  	})
   156  }
   157  
   158  func registerPrometheus(config *csconfig.PrometheusCfg) {
   159  	if !config.Enabled {
   160  		return
   161  	}
   162  
   163  	// Registering prometheus
   164  	// If in aggregated mode, do not register events associated with a source, to keep the cardinality low
   165  	if config.Level == configuration.CFG_METRICS_AGGREGATE {
   166  		log.Infof("Loading aggregated prometheus collectors")
   167  		prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo,
   168  			globalCsInfo, globalParsingHistogram, globalPourHistogram,
   169  			leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow,
   170  			v1.LapiRouteHits,
   171  			leaky.BucketsCurrentCount,
   172  			cache.CacheMetrics, exprhelpers.RegexpCacheMetrics, parser.NodesWlHitsOk, parser.NodesWlHits,
   173  		)
   174  	} else {
   175  		log.Infof("Loading prometheus collectors")
   176  		prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo,
   177  			parser.NodesHits, parser.NodesHitsOk, parser.NodesHitsKo,
   178  			globalCsInfo, globalParsingHistogram, globalPourHistogram,
   179  			v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime,
   180  			leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount,
   181  			globalActiveDecisions, globalAlerts, parser.NodesWlHitsOk, parser.NodesWlHits,
   182  			cache.CacheMetrics, exprhelpers.RegexpCacheMetrics,
   183  		)
   184  	}
   185  }
   186  
   187  func servePrometheus(config *csconfig.PrometheusCfg, dbClient *database.Client, agentReady chan bool) {
   188  	<-agentReady
   189  
   190  	if !config.Enabled {
   191  		return
   192  	}
   193  
   194  	defer trace.CatchPanic("crowdsec/servePrometheus")
   195  
   196  	http.Handle("/metrics", computeDynamicMetrics(promhttp.Handler(), dbClient))
   197  	log.Debugf("serving metrics after %s ms", time.Since(crowdsecT0))
   198  
   199  	if err := http.ListenAndServe(fmt.Sprintf("%s:%d", config.ListenAddr, config.ListenPort), nil); err != nil {
   200  		// in time machine, we most likely have the LAPI using the port
   201  		if !flags.haveTimeMachine() {
   202  			log.Warningf("prometheus: %s", err)
   203  		}
   204  	}
   205  }