github.com/grafana/pyroscope@v1.18.0/pkg/validation/exporter/exporter.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/util/validation/exporter.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package exporter 7 8 import ( 9 "context" 10 "flag" 11 "net/http" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/grafana/dskit/services" 16 "github.com/pkg/errors" 17 "github.com/prometheus/client_golang/prometheus" 18 19 "github.com/grafana/pyroscope/pkg/util" 20 "github.com/grafana/pyroscope/pkg/validation" 21 ) 22 23 // Config holds the configuration for an overrides-exporter 24 type Config struct { 25 Ring RingConfig `yaml:"ring"` 26 } 27 28 // RegisterFlags configs this instance to the given FlagSet 29 func (c *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { 30 c.Ring.RegisterFlags(f, logger) 31 } 32 33 // Validate validates the configuration for an overrides-exporter. 34 func (c *Config) Validate() error { 35 return c.Ring.Validate() 36 } 37 38 // OverridesExporter exposes per-tenant resource limit overrides as Prometheus metrics 39 type OverridesExporter struct { 40 services.Service 41 42 defaultLimits *validation.Limits 43 tenantLimits validation.TenantLimits 44 overrideDescription *prometheus.Desc 45 defaultsDescription *prometheus.Desc 46 logger log.Logger 47 48 // OverridesExporter can optionally use a ring to uniquely shard tenants to 49 // instances and avoid export of duplicate metrics. 50 ring *overridesExporterRing 51 } 52 53 // NewOverridesExporter creates an OverridesExporter that reads updates to per-tenant 54 // limits using the provided function. 55 func NewOverridesExporter( 56 config Config, 57 defaultLimits *validation.Limits, 58 tenantLimits validation.TenantLimits, 59 log log.Logger, 60 registerer prometheus.Registerer, 61 ) (*OverridesExporter, error) { 62 exporter := &OverridesExporter{ 63 defaultLimits: defaultLimits, 64 tenantLimits: tenantLimits, 65 overrideDescription: prometheus.NewDesc( 66 "pyroscope_limits_overrides", 67 "Resource limit overrides applied to tenants", 68 []string{"limit_name", "tenant"}, 69 nil, 70 ), 71 defaultsDescription: prometheus.NewDesc( 72 "pyroscope_limits_defaults", 73 "Resource limit defaults for tenants without overrides", 74 []string{"limit_name"}, 75 nil, 76 ), 77 logger: log, 78 } 79 var err error 80 exporter.ring, err = newRing(config.Ring, log, registerer) 81 if err != nil { 82 return nil, errors.Wrap(err, "failed to create ring/lifecycler") 83 } 84 85 exporter.Service = services.NewBasicService(exporter.starting, exporter.running, exporter.stopping) 86 return exporter, nil 87 } 88 89 func (oe *OverridesExporter) Describe(ch chan<- *prometheus.Desc) { 90 ch <- oe.defaultsDescription 91 ch <- oe.overrideDescription 92 } 93 94 func (oe *OverridesExporter) Collect(ch chan<- prometheus.Metric) { 95 if !oe.isLeader() { 96 // If another replica is the leader, don't expose any metrics from this one. 97 return 98 } 99 100 // Write path limits 101 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, oe.defaultLimits.IngestionRateMB, "ingestion_rate_mb") 102 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, oe.defaultLimits.IngestionBurstSizeMB, "ingestion_burst_size_mb") 103 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxGlobalSeriesPerTenant), "max_global_series_per_tenant") 104 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLocalSeriesPerTenant), "max_series_per_tenant") 105 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelNameLength), "max_label_name_length") 106 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelValueLength), "max_label_value_length") 107 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelNamesPerSeries), "max_label_names_per_series") 108 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxSessionsPerSeries), "max_sessions_per_series") 109 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.DistributorAggregationWindow), "distributor_aggregation_window") 110 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.DistributorAggregationPeriod), "distributor_aggregation_period") 111 112 // Read path limits 113 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryLookback), "max_query_lookback") 114 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryLength), "max_query_length") 115 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryParallelism), "max_query_parallelism") 116 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.QuerySplitDuration), "split_queries_by_interval") 117 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxFlameGraphNodesDefault), "max_flamegraph_nodes_default") 118 ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxFlameGraphNodesMax), "max_flamegraph_nodes_max") 119 120 // Do not export per-tenant limits if they've not been configured at all. 121 if oe.tenantLimits == nil { 122 return 123 } 124 125 allLimits := oe.tenantLimits.AllByTenantID() 126 for tenant, limits := range allLimits { 127 // Write path limits 128 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, limits.IngestionRateMB, "ingestion_rate_mb", tenant) 129 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, limits.IngestionBurstSizeMB, "ingestion_burst_size_mb", tenant) 130 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerTenant), "max_global_series_per_tenant", tenant) 131 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerTenant), "max_series_per_tenant", tenant) 132 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelNameLength), "max_label_name_length", tenant) 133 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelValueLength), "max_label_value_length", tenant) 134 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelNamesPerSeries), "max_label_names_per_series", tenant) 135 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxSessionsPerSeries), "max_sessions_per_series", tenant) 136 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.DistributorAggregationWindow), "distributor_aggregation_window", tenant) 137 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.DistributorAggregationPeriod), "distributor_aggregation_period", tenant) 138 139 // Read path limits 140 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryLookback), "max_query_lookback", tenant) 141 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryLength), "max_query_length", tenant) 142 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryParallelism), "max_query_parallelism", tenant) 143 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.QuerySplitDuration), "split_queries_by_interval", tenant) 144 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxFlameGraphNodesDefault), "max_flamegraph_nodes_default", tenant) 145 ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxFlameGraphNodesMax), "max_flamegraph_nodes_max", tenant) 146 } 147 } 148 149 // RingHandler is an http.Handler that serves requests for the overrides-exporter ring status page 150 func (oe *OverridesExporter) RingHandler(w http.ResponseWriter, req *http.Request) { 151 if oe.ring != nil { 152 oe.ring.lifecycler.ServeHTTP(w, req) 153 return 154 } 155 156 ringDisabledPage := ` 157 <!DOCTYPE html> 158 <html> 159 <head> 160 <meta charset="UTF-8"> 161 <title>Overrides-exporter Status</title> 162 </head> 163 <body> 164 <h1>Overrides-exporter Status</h1> 165 <p>Overrides-exporter hash ring is disabled.</p> 166 </body> 167 </html>` 168 util.WriteHTMLResponse(w, ringDisabledPage) 169 } 170 171 // isLeader determines whether this overrides-exporter instance is the leader 172 // replica that exports all limit metrics. If the ring is disabled, leadership is 173 // assumed. If the ring is enabled, it is used to determine which ring member is 174 // the leader replica. 175 func (oe *OverridesExporter) isLeader() bool { 176 if oe.ring == nil { 177 // If the ring is not enabled, export all metrics 178 return true 179 } 180 if oe.State() != services.Running { 181 // We haven't finished startup yet, likely waiting for ring stability. 182 return false 183 } 184 isLeaderNow, err := oe.ring.isLeader() 185 if err != nil { 186 // If there was an error establishing ownership using the ring, log a warning and 187 // default to not exporting metrics to keep series churn low for transient ring 188 // issues. 189 level.Warn(oe.logger).Log("msg", "overrides-exporter failed to determine ring leader", "err", err.Error()) 190 return false 191 } 192 return isLeaderNow 193 } 194 195 func (oe *OverridesExporter) starting(ctx context.Context) error { 196 if oe.ring == nil { 197 return nil 198 } 199 return oe.ring.starting(ctx) 200 } 201 202 func (oe *OverridesExporter) running(ctx context.Context) error { 203 if oe.ring == nil { 204 <-ctx.Done() 205 return nil 206 } 207 return oe.ring.running(ctx) 208 } 209 210 func (oe *OverridesExporter) stopping(err error) error { 211 if oe.ring == nil { 212 return nil 213 } 214 return oe.ring.stopping(err) 215 }