github.com/grafana/pyroscope@v1.18.0/pkg/validation/exporter/exporter.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/util/validation/exporter.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package exporter
     7  
     8  import (
     9  	"context"
    10  	"flag"
    11  	"net/http"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/grafana/dskit/services"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/client_golang/prometheus"
    18  
    19  	"github.com/grafana/pyroscope/pkg/util"
    20  	"github.com/grafana/pyroscope/pkg/validation"
    21  )
    22  
    23  // Config holds the configuration for an overrides-exporter
    24  type Config struct {
    25  	Ring RingConfig `yaml:"ring"`
    26  }
    27  
    28  // RegisterFlags configs this instance to the given FlagSet
    29  func (c *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
    30  	c.Ring.RegisterFlags(f, logger)
    31  }
    32  
    33  // Validate validates the configuration for an overrides-exporter.
    34  func (c *Config) Validate() error {
    35  	return c.Ring.Validate()
    36  }
    37  
    38  // OverridesExporter exposes per-tenant resource limit overrides as Prometheus metrics
    39  type OverridesExporter struct {
    40  	services.Service
    41  
    42  	defaultLimits       *validation.Limits
    43  	tenantLimits        validation.TenantLimits
    44  	overrideDescription *prometheus.Desc
    45  	defaultsDescription *prometheus.Desc
    46  	logger              log.Logger
    47  
    48  	// OverridesExporter can optionally use a ring to uniquely shard tenants to
    49  	// instances and avoid export of duplicate metrics.
    50  	ring *overridesExporterRing
    51  }
    52  
    53  // NewOverridesExporter creates an OverridesExporter that reads updates to per-tenant
    54  // limits using the provided function.
    55  func NewOverridesExporter(
    56  	config Config,
    57  	defaultLimits *validation.Limits,
    58  	tenantLimits validation.TenantLimits,
    59  	log log.Logger,
    60  	registerer prometheus.Registerer,
    61  ) (*OverridesExporter, error) {
    62  	exporter := &OverridesExporter{
    63  		defaultLimits: defaultLimits,
    64  		tenantLimits:  tenantLimits,
    65  		overrideDescription: prometheus.NewDesc(
    66  			"pyroscope_limits_overrides",
    67  			"Resource limit overrides applied to tenants",
    68  			[]string{"limit_name", "tenant"},
    69  			nil,
    70  		),
    71  		defaultsDescription: prometheus.NewDesc(
    72  			"pyroscope_limits_defaults",
    73  			"Resource limit defaults for tenants without overrides",
    74  			[]string{"limit_name"},
    75  			nil,
    76  		),
    77  		logger: log,
    78  	}
    79  	var err error
    80  	exporter.ring, err = newRing(config.Ring, log, registerer)
    81  	if err != nil {
    82  		return nil, errors.Wrap(err, "failed to create ring/lifecycler")
    83  	}
    84  
    85  	exporter.Service = services.NewBasicService(exporter.starting, exporter.running, exporter.stopping)
    86  	return exporter, nil
    87  }
    88  
    89  func (oe *OverridesExporter) Describe(ch chan<- *prometheus.Desc) {
    90  	ch <- oe.defaultsDescription
    91  	ch <- oe.overrideDescription
    92  }
    93  
    94  func (oe *OverridesExporter) Collect(ch chan<- prometheus.Metric) {
    95  	if !oe.isLeader() {
    96  		// If another replica is the leader, don't expose any metrics from this one.
    97  		return
    98  	}
    99  
   100  	// Write path limits
   101  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, oe.defaultLimits.IngestionRateMB, "ingestion_rate_mb")
   102  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, oe.defaultLimits.IngestionBurstSizeMB, "ingestion_burst_size_mb")
   103  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxGlobalSeriesPerTenant), "max_global_series_per_tenant")
   104  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLocalSeriesPerTenant), "max_series_per_tenant")
   105  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelNameLength), "max_label_name_length")
   106  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelValueLength), "max_label_value_length")
   107  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxLabelNamesPerSeries), "max_label_names_per_series")
   108  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxSessionsPerSeries), "max_sessions_per_series")
   109  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.DistributorAggregationWindow), "distributor_aggregation_window")
   110  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.DistributorAggregationPeriod), "distributor_aggregation_period")
   111  
   112  	// Read path limits
   113  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryLookback), "max_query_lookback")
   114  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryLength), "max_query_length")
   115  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxQueryParallelism), "max_query_parallelism")
   116  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.QuerySplitDuration), "split_queries_by_interval")
   117  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxFlameGraphNodesDefault), "max_flamegraph_nodes_default")
   118  	ch <- prometheus.MustNewConstMetric(oe.defaultsDescription, prometheus.GaugeValue, float64(oe.defaultLimits.MaxFlameGraphNodesMax), "max_flamegraph_nodes_max")
   119  
   120  	// Do not export per-tenant limits if they've not been configured at all.
   121  	if oe.tenantLimits == nil {
   122  		return
   123  	}
   124  
   125  	allLimits := oe.tenantLimits.AllByTenantID()
   126  	for tenant, limits := range allLimits {
   127  		// Write path limits
   128  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, limits.IngestionRateMB, "ingestion_rate_mb", tenant)
   129  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, limits.IngestionBurstSizeMB, "ingestion_burst_size_mb", tenant)
   130  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerTenant), "max_global_series_per_tenant", tenant)
   131  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerTenant), "max_series_per_tenant", tenant)
   132  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelNameLength), "max_label_name_length", tenant)
   133  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelValueLength), "max_label_value_length", tenant)
   134  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxLabelNamesPerSeries), "max_label_names_per_series", tenant)
   135  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxSessionsPerSeries), "max_sessions_per_series", tenant)
   136  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.DistributorAggregationWindow), "distributor_aggregation_window", tenant)
   137  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.DistributorAggregationPeriod), "distributor_aggregation_period", tenant)
   138  
   139  		// Read path limits
   140  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryLookback), "max_query_lookback", tenant)
   141  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryLength), "max_query_length", tenant)
   142  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxQueryParallelism), "max_query_parallelism", tenant)
   143  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.QuerySplitDuration), "split_queries_by_interval", tenant)
   144  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxFlameGraphNodesDefault), "max_flamegraph_nodes_default", tenant)
   145  		ch <- prometheus.MustNewConstMetric(oe.overrideDescription, prometheus.GaugeValue, float64(limits.MaxFlameGraphNodesMax), "max_flamegraph_nodes_max", tenant)
   146  	}
   147  }
   148  
   149  // RingHandler is an http.Handler that serves requests for the overrides-exporter ring status page
   150  func (oe *OverridesExporter) RingHandler(w http.ResponseWriter, req *http.Request) {
   151  	if oe.ring != nil {
   152  		oe.ring.lifecycler.ServeHTTP(w, req)
   153  		return
   154  	}
   155  
   156  	ringDisabledPage := `
   157  		<!DOCTYPE html>
   158  		<html>
   159  			<head>
   160  				<meta charset="UTF-8">
   161  				<title>Overrides-exporter Status</title>
   162  			</head>
   163  			<body>
   164  				<h1>Overrides-exporter Status</h1>
   165  				<p>Overrides-exporter hash ring is disabled.</p>
   166  			</body>
   167  		</html>`
   168  	util.WriteHTMLResponse(w, ringDisabledPage)
   169  }
   170  
   171  // isLeader determines whether this overrides-exporter instance is the leader
   172  // replica that exports all limit metrics. If the ring is disabled, leadership is
   173  // assumed. If the ring is enabled, it is used to determine which ring member is
   174  // the leader replica.
   175  func (oe *OverridesExporter) isLeader() bool {
   176  	if oe.ring == nil {
   177  		// If the ring is not enabled, export all metrics
   178  		return true
   179  	}
   180  	if oe.State() != services.Running {
   181  		// We haven't finished startup yet, likely waiting for ring stability.
   182  		return false
   183  	}
   184  	isLeaderNow, err := oe.ring.isLeader()
   185  	if err != nil {
   186  		// If there was an error establishing ownership using the ring, log a warning and
   187  		// default to not exporting metrics to keep series churn low for transient ring
   188  		// issues.
   189  		level.Warn(oe.logger).Log("msg", "overrides-exporter failed to determine ring leader", "err", err.Error())
   190  		return false
   191  	}
   192  	return isLeaderNow
   193  }
   194  
   195  func (oe *OverridesExporter) starting(ctx context.Context) error {
   196  	if oe.ring == nil {
   197  		return nil
   198  	}
   199  	return oe.ring.starting(ctx)
   200  }
   201  
   202  func (oe *OverridesExporter) running(ctx context.Context) error {
   203  	if oe.ring == nil {
   204  		<-ctx.Done()
   205  		return nil
   206  	}
   207  	return oe.ring.running(ctx)
   208  }
   209  
   210  func (oe *OverridesExporter) stopping(err error) error {
   211  	if oe.ring == nil {
   212  		return nil
   213  	}
   214  	return oe.ring.stopping(err)
   215  }