github.com/crowdsecurity/crowdsec@v1.6.1/cmd/crowdsec/metrics.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "net/http" 6 "time" 7 8 "github.com/prometheus/client_golang/prometheus" 9 "github.com/prometheus/client_golang/prometheus/promhttp" 10 log "github.com/sirupsen/logrus" 11 12 "github.com/crowdsecurity/go-cs-lib/trace" 13 "github.com/crowdsecurity/go-cs-lib/version" 14 15 "github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration" 16 v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1" 17 "github.com/crowdsecurity/crowdsec/pkg/cache" 18 "github.com/crowdsecurity/crowdsec/pkg/csconfig" 19 "github.com/crowdsecurity/crowdsec/pkg/database" 20 "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" 21 leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket" 22 "github.com/crowdsecurity/crowdsec/pkg/parser" 23 ) 24 25 /*prometheus*/ 26 var globalParserHits = prometheus.NewCounterVec( 27 prometheus.CounterOpts{ 28 Name: "cs_parser_hits_total", 29 Help: "Total events entered the parser.", 30 }, 31 []string{"source", "type"}, 32 ) 33 var globalParserHitsOk = prometheus.NewCounterVec( 34 prometheus.CounterOpts{ 35 Name: "cs_parser_hits_ok_total", 36 Help: "Total events were successfully parsed.", 37 }, 38 []string{"source", "type"}, 39 ) 40 var globalParserHitsKo = prometheus.NewCounterVec( 41 prometheus.CounterOpts{ 42 Name: "cs_parser_hits_ko_total", 43 Help: "Total events were unsuccessfully parsed.", 44 }, 45 []string{"source", "type"}, 46 ) 47 48 var globalBucketPourKo = prometheus.NewCounter( 49 prometheus.CounterOpts{ 50 Name: "cs_bucket_pour_ko_total", 51 Help: "Total events were not poured in a bucket.", 52 }, 53 ) 54 55 var globalBucketPourOk = prometheus.NewCounter( 56 prometheus.CounterOpts{ 57 Name: "cs_bucket_pour_ok_total", 58 Help: "Total events were poured in at least one bucket.", 59 }, 60 ) 61 62 var globalCsInfo = prometheus.NewGauge( 63 prometheus.GaugeOpts{ 64 Name: "cs_info", 65 Help: "Information about Crowdsec.", 66 ConstLabels: prometheus.Labels{"version": version.String()}, 67 }, 68 ) 69 70 var globalActiveDecisions = prometheus.NewGaugeVec( 71 prometheus.GaugeOpts{ 72 Name: "cs_active_decisions", 73 Help: "Number of active decisions.", 74 }, 75 []string{"reason", "origin", "action"}, 76 ) 77 78 var globalAlerts = prometheus.NewGaugeVec( 79 prometheus.GaugeOpts{ 80 Name: "cs_alerts", 81 Help: "Number of alerts (excluding CAPI).", 82 }, 83 []string{"reason"}, 84 ) 85 86 var globalParsingHistogram = prometheus.NewHistogramVec( 87 prometheus.HistogramOpts{ 88 Help: "Time spent parsing a line", 89 Name: "cs_parsing_time_seconds", 90 Buckets: []float64{0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.004, 0.005, 0.0075, 0.01}, 91 }, 92 []string{"type", "source"}, 93 ) 94 95 var globalPourHistogram = prometheus.NewHistogramVec( 96 prometheus.HistogramOpts{ 97 Name: "cs_bucket_pour_seconds", 98 Help: "Time spent pouring an event to buckets.", 99 Buckets: []float64{0.001, 0.002, 0.005, 0.01, 0.015, 0.02, 0.03, 0.04, 0.05}, 100 }, 101 []string{"type", "source"}, 102 ) 103 104 func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.HandlerFunc { 105 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 106 // catch panics here because they are not handled by servePrometheus 107 defer trace.CatchPanic("crowdsec/computeDynamicMetrics") 108 // update cache metrics (stash) 109 cache.UpdateCacheMetrics() 110 // update cache metrics (regexp) 111 exprhelpers.UpdateRegexpCacheMetrics() 112 113 // decision metrics are only relevant for LAPI 114 if dbClient == nil { 115 next.ServeHTTP(w, r) 116 return 117 } 118 119 decisionsFilters := make(map[string][]string, 0) 120 121 decisions, err := dbClient.QueryDecisionCountByScenario(decisionsFilters) 122 if err != nil { 123 log.Errorf("Error querying decisions for metrics: %v", err) 124 next.ServeHTTP(w, r) 125 126 return 127 } 128 129 globalActiveDecisions.Reset() 130 131 for _, d := range decisions { 132 globalActiveDecisions.With(prometheus.Labels{"reason": d.Scenario, "origin": d.Origin, "action": d.Type}).Set(float64(d.Count)) 133 } 134 135 globalAlerts.Reset() 136 137 alertsFilter := map[string][]string{ 138 "include_capi": {"false"}, 139 } 140 141 alerts, err := dbClient.AlertsCountPerScenario(alertsFilter) 142 143 if err != nil { 144 log.Errorf("Error querying alerts for metrics: %v", err) 145 next.ServeHTTP(w, r) 146 147 return 148 } 149 150 for k, v := range alerts { 151 globalAlerts.With(prometheus.Labels{"reason": k}).Set(float64(v)) 152 } 153 154 next.ServeHTTP(w, r) 155 }) 156 } 157 158 func registerPrometheus(config *csconfig.PrometheusCfg) { 159 if !config.Enabled { 160 return 161 } 162 163 // Registering prometheus 164 // If in aggregated mode, do not register events associated with a source, to keep the cardinality low 165 if config.Level == configuration.CFG_METRICS_AGGREGATE { 166 log.Infof("Loading aggregated prometheus collectors") 167 prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo, 168 globalCsInfo, globalParsingHistogram, globalPourHistogram, 169 leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, 170 v1.LapiRouteHits, 171 leaky.BucketsCurrentCount, 172 cache.CacheMetrics, exprhelpers.RegexpCacheMetrics, parser.NodesWlHitsOk, parser.NodesWlHits, 173 ) 174 } else { 175 log.Infof("Loading prometheus collectors") 176 prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo, 177 parser.NodesHits, parser.NodesHitsOk, parser.NodesHitsKo, 178 globalCsInfo, globalParsingHistogram, globalPourHistogram, 179 v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime, 180 leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount, 181 globalActiveDecisions, globalAlerts, parser.NodesWlHitsOk, parser.NodesWlHits, 182 cache.CacheMetrics, exprhelpers.RegexpCacheMetrics, 183 ) 184 } 185 } 186 187 func servePrometheus(config *csconfig.PrometheusCfg, dbClient *database.Client, agentReady chan bool) { 188 <-agentReady 189 190 if !config.Enabled { 191 return 192 } 193 194 defer trace.CatchPanic("crowdsec/servePrometheus") 195 196 http.Handle("/metrics", computeDynamicMetrics(promhttp.Handler(), dbClient)) 197 log.Debugf("serving metrics after %s ms", time.Since(crowdsecT0)) 198 199 if err := http.ListenAndServe(fmt.Sprintf("%s:%d", config.ListenAddr, config.ListenPort), nil); err != nil { 200 // in time machine, we most likely have the LAPI using the port 201 if !flags.haveTimeMachine() { 202 log.Warningf("prometheus: %s", err) 203 } 204 } 205 }