github.com/thanos-io/thanos@v0.32.5/pkg/receive/head_series_limiter.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package receive
     5  
     6  import (
     7  	"context"
     8  	"net/http"
     9  	"net/url"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/prometheus/client_golang/prometheus/promauto"
    17  	"github.com/thanos-io/thanos/pkg/errors"
    18  	"github.com/thanos-io/thanos/pkg/httpconfig"
    19  	"github.com/thanos-io/thanos/pkg/promclient"
    20  )
    21  
    22  // headSeriesLimit implements headSeriesLimiter interface.
    23  type headSeriesLimit struct {
    24  	mtx                    sync.RWMutex
    25  	limitsPerTenant        map[string]uint64
    26  	tenantCurrentSeriesMap map[string]float64
    27  	defaultLimit           uint64
    28  
    29  	metaMonitoringURL    *url.URL
    30  	metaMonitoringClient *http.Client
    31  	metaMonitoringQuery  string
    32  
    33  	configuredTenantLimit *prometheus.GaugeVec
    34  	limitedRequests       *prometheus.CounterVec
    35  	metaMonitoringErr     prometheus.Counter
    36  
    37  	logger log.Logger
    38  }
    39  
    40  func NewHeadSeriesLimit(w WriteLimitsConfig, registerer prometheus.Registerer, logger log.Logger) *headSeriesLimit {
    41  	limit := &headSeriesLimit{
    42  		metaMonitoringURL:   w.GlobalLimits.metaMonitoringURL,
    43  		metaMonitoringQuery: w.GlobalLimits.MetaMonitoringLimitQuery,
    44  		defaultLimit:        w.DefaultLimits.HeadSeriesLimit,
    45  		configuredTenantLimit: promauto.With(registerer).NewGaugeVec(
    46  			prometheus.GaugeOpts{
    47  				Name: "thanos_receive_head_series_limit",
    48  				Help: "The configured limit for active (head) series of tenants.",
    49  			}, []string{"tenant"},
    50  		),
    51  		limitedRequests: promauto.With(registerer).NewCounterVec(
    52  			prometheus.CounterOpts{
    53  				Name: "thanos_receive_head_series_limited_requests_total",
    54  				Help: "The total number of remote write requests that have been dropped due to active series limiting.",
    55  			}, []string{"tenant"},
    56  		),
    57  		metaMonitoringErr: promauto.With(registerer).NewCounter(
    58  			prometheus.CounterOpts{
    59  				Name: "thanos_receive_metamonitoring_failed_queries_total",
    60  				Help: "The total number of meta-monitoring queries that failed while limiting.",
    61  			},
    62  		),
    63  		logger: logger,
    64  	}
    65  
    66  	// Record default limit with empty tenant label.
    67  	limit.configuredTenantLimit.WithLabelValues("").Set(float64(limit.defaultLimit))
    68  
    69  	// Initialize map for configured limits of each tenant.
    70  	limit.limitsPerTenant = map[string]uint64{}
    71  	for t, w := range w.TenantsLimits {
    72  		// No limit set for tenant so inherit default, which could be unlimited as well.
    73  		if w.HeadSeriesLimit == nil {
    74  			limit.limitsPerTenant[t] = limit.defaultLimit
    75  			limit.configuredTenantLimit.WithLabelValues(t).Set(float64(limit.defaultLimit))
    76  			continue
    77  		}
    78  
    79  		// Limit set to provided one for tenant that could be unlimited or some value.
    80  		// Default not inherited.
    81  		limit.limitsPerTenant[t] = *w.HeadSeriesLimit
    82  		limit.configuredTenantLimit.WithLabelValues(t).Set(float64(*w.HeadSeriesLimit))
    83  	}
    84  
    85  	// Initialize map for current head series of each tenant.
    86  	limit.tenantCurrentSeriesMap = map[string]float64{}
    87  
    88  	// Use specified HTTPConfig (if any) to make requests to meta-monitoring.
    89  	c := httpconfig.NewDefaultClientConfig()
    90  	if w.GlobalLimits.MetaMonitoringHTTPClient != nil {
    91  		c = *w.GlobalLimits.MetaMonitoringHTTPClient
    92  	}
    93  
    94  	var err error
    95  	limit.metaMonitoringClient, err = httpconfig.NewHTTPClient(c, "meta-mon-for-limit")
    96  	if err != nil {
    97  		level.Error(logger).Log("msg", "improper http client config", "err", err.Error())
    98  	}
    99  
   100  	return limit
   101  }
   102  
   103  // QueryMetaMonitoring queries any Prometheus Query API compatible meta-monitoring
   104  // solution with the configured query for getting current active (head) series of all tenants.
   105  // It then populates tenantCurrentSeries map with result.
   106  func (h *headSeriesLimit) QueryMetaMonitoring(ctx context.Context) error {
   107  	c := promclient.NewWithTracingClient(h.logger, h.metaMonitoringClient, httpconfig.ThanosUserAgent)
   108  
   109  	vectorRes, _, _, err := c.QueryInstant(ctx, h.metaMonitoringURL, h.metaMonitoringQuery, time.Now(), promclient.QueryOptions{Deduplicate: true})
   110  	if err != nil {
   111  		h.metaMonitoringErr.Inc()
   112  		return err
   113  	}
   114  
   115  	level.Debug(h.logger).Log("msg", "successfully queried meta-monitoring", "vectors", len(vectorRes))
   116  
   117  	h.mtx.Lock()
   118  	defer h.mtx.Unlock()
   119  	// Construct map of tenant name and current head series.
   120  	for _, e := range vectorRes {
   121  		for k, v := range e.Metric {
   122  			if k == "tenant" {
   123  				h.tenantCurrentSeriesMap[string(v)] = float64(e.Value)
   124  				level.Debug(h.logger).Log("msg", "tenant value queried", "tenant", string(v), "value", e.Value)
   125  			}
   126  		}
   127  	}
   128  
   129  	return nil
   130  }
   131  
   132  // isUnderLimit ensures that the current number of active series for a tenant does not exceed given limit.
   133  // It does so in a best-effort way, i.e, in case meta-monitoring is unreachable, it does not impose limits.
   134  func (h *headSeriesLimit) isUnderLimit(tenant string) (bool, error) {
   135  	h.mtx.RLock()
   136  	defer h.mtx.RUnlock()
   137  	if len(h.limitsPerTenant) == 0 && h.defaultLimit == 0 {
   138  		return true, nil
   139  	}
   140  
   141  	// In such limiting flow, we ingest the first remote write request
   142  	// and then check meta-monitoring metric to ascertain current active
   143  	// series. As such metric is updated in intervals, it is possible
   144  	// that Receive ingests more series than the limit, before detecting that
   145  	// a tenant has exceeded the set limits.
   146  	v, ok := h.tenantCurrentSeriesMap[tenant]
   147  	if !ok {
   148  		return true, errors.Newf("tenant not in current series map")
   149  	}
   150  
   151  	var limit uint64
   152  	limit, ok = h.limitsPerTenant[tenant]
   153  	if !ok {
   154  		// Tenant has not been defined in config, so fallback to default.
   155  		limit = h.defaultLimit
   156  	}
   157  
   158  	// If tenant limit is 0 we treat it as unlimited.
   159  	if limit == 0 {
   160  		return true, nil
   161  	}
   162  
   163  	if v >= float64(limit) {
   164  		level.Error(h.logger).Log("msg", "tenant above limit", "tenant", tenant, "currentSeries", v, "limit", limit)
   165  		h.limitedRequests.WithLabelValues(tenant).Inc()
   166  		return false, nil
   167  	}
   168  
   169  	return true, nil
   170  }
   171  
   172  // nopSeriesLimit implements activeSeriesLimiter interface as no-op.
   173  type nopSeriesLimit struct{}
   174  
   175  func NewNopSeriesLimit() *nopSeriesLimit {
   176  	return &nopSeriesLimit{}
   177  }
   178  
   179  func (a *nopSeriesLimit) QueryMetaMonitoring(_ context.Context) error {
   180  	return nil
   181  }
   182  
   183  func (a *nopSeriesLimit) isUnderLimit(_ string) (bool, error) {
   184  	return true, nil
   185  }