github.com/thanos-io/thanos@v0.32.5/pkg/receive/head_series_limiter.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package receive 5 6 import ( 7 "context" 8 "net/http" 9 "net/url" 10 "sync" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/prometheus/client_golang/prometheus" 16 "github.com/prometheus/client_golang/prometheus/promauto" 17 "github.com/thanos-io/thanos/pkg/errors" 18 "github.com/thanos-io/thanos/pkg/httpconfig" 19 "github.com/thanos-io/thanos/pkg/promclient" 20 ) 21 22 // headSeriesLimit implements headSeriesLimiter interface. 23 type headSeriesLimit struct { 24 mtx sync.RWMutex 25 limitsPerTenant map[string]uint64 26 tenantCurrentSeriesMap map[string]float64 27 defaultLimit uint64 28 29 metaMonitoringURL *url.URL 30 metaMonitoringClient *http.Client 31 metaMonitoringQuery string 32 33 configuredTenantLimit *prometheus.GaugeVec 34 limitedRequests *prometheus.CounterVec 35 metaMonitoringErr prometheus.Counter 36 37 logger log.Logger 38 } 39 40 func NewHeadSeriesLimit(w WriteLimitsConfig, registerer prometheus.Registerer, logger log.Logger) *headSeriesLimit { 41 limit := &headSeriesLimit{ 42 metaMonitoringURL: w.GlobalLimits.metaMonitoringURL, 43 metaMonitoringQuery: w.GlobalLimits.MetaMonitoringLimitQuery, 44 defaultLimit: w.DefaultLimits.HeadSeriesLimit, 45 configuredTenantLimit: promauto.With(registerer).NewGaugeVec( 46 prometheus.GaugeOpts{ 47 Name: "thanos_receive_head_series_limit", 48 Help: "The configured limit for active (head) series of tenants.", 49 }, []string{"tenant"}, 50 ), 51 limitedRequests: promauto.With(registerer).NewCounterVec( 52 prometheus.CounterOpts{ 53 Name: "thanos_receive_head_series_limited_requests_total", 54 Help: "The total number of remote write requests that have been dropped due to active series limiting.", 55 }, []string{"tenant"}, 56 ), 57 metaMonitoringErr: promauto.With(registerer).NewCounter( 58 prometheus.CounterOpts{ 59 Name: "thanos_receive_metamonitoring_failed_queries_total", 60 Help: "The total number of meta-monitoring queries that failed while limiting.", 61 }, 62 ), 63 logger: logger, 64 } 65 66 // Record default limit with empty tenant label. 67 limit.configuredTenantLimit.WithLabelValues("").Set(float64(limit.defaultLimit)) 68 69 // Initialize map for configured limits of each tenant. 70 limit.limitsPerTenant = map[string]uint64{} 71 for t, w := range w.TenantsLimits { 72 // No limit set for tenant so inherit default, which could be unlimited as well. 73 if w.HeadSeriesLimit == nil { 74 limit.limitsPerTenant[t] = limit.defaultLimit 75 limit.configuredTenantLimit.WithLabelValues(t).Set(float64(limit.defaultLimit)) 76 continue 77 } 78 79 // Limit set to provided one for tenant that could be unlimited or some value. 80 // Default not inherited. 81 limit.limitsPerTenant[t] = *w.HeadSeriesLimit 82 limit.configuredTenantLimit.WithLabelValues(t).Set(float64(*w.HeadSeriesLimit)) 83 } 84 85 // Initialize map for current head series of each tenant. 86 limit.tenantCurrentSeriesMap = map[string]float64{} 87 88 // Use specified HTTPConfig (if any) to make requests to meta-monitoring. 89 c := httpconfig.NewDefaultClientConfig() 90 if w.GlobalLimits.MetaMonitoringHTTPClient != nil { 91 c = *w.GlobalLimits.MetaMonitoringHTTPClient 92 } 93 94 var err error 95 limit.metaMonitoringClient, err = httpconfig.NewHTTPClient(c, "meta-mon-for-limit") 96 if err != nil { 97 level.Error(logger).Log("msg", "improper http client config", "err", err.Error()) 98 } 99 100 return limit 101 } 102 103 // QueryMetaMonitoring queries any Prometheus Query API compatible meta-monitoring 104 // solution with the configured query for getting current active (head) series of all tenants. 105 // It then populates tenantCurrentSeries map with result. 106 func (h *headSeriesLimit) QueryMetaMonitoring(ctx context.Context) error { 107 c := promclient.NewWithTracingClient(h.logger, h.metaMonitoringClient, httpconfig.ThanosUserAgent) 108 109 vectorRes, _, _, err := c.QueryInstant(ctx, h.metaMonitoringURL, h.metaMonitoringQuery, time.Now(), promclient.QueryOptions{Deduplicate: true}) 110 if err != nil { 111 h.metaMonitoringErr.Inc() 112 return err 113 } 114 115 level.Debug(h.logger).Log("msg", "successfully queried meta-monitoring", "vectors", len(vectorRes)) 116 117 h.mtx.Lock() 118 defer h.mtx.Unlock() 119 // Construct map of tenant name and current head series. 120 for _, e := range vectorRes { 121 for k, v := range e.Metric { 122 if k == "tenant" { 123 h.tenantCurrentSeriesMap[string(v)] = float64(e.Value) 124 level.Debug(h.logger).Log("msg", "tenant value queried", "tenant", string(v), "value", e.Value) 125 } 126 } 127 } 128 129 return nil 130 } 131 132 // isUnderLimit ensures that the current number of active series for a tenant does not exceed given limit. 133 // It does so in a best-effort way, i.e, in case meta-monitoring is unreachable, it does not impose limits. 134 func (h *headSeriesLimit) isUnderLimit(tenant string) (bool, error) { 135 h.mtx.RLock() 136 defer h.mtx.RUnlock() 137 if len(h.limitsPerTenant) == 0 && h.defaultLimit == 0 { 138 return true, nil 139 } 140 141 // In such limiting flow, we ingest the first remote write request 142 // and then check meta-monitoring metric to ascertain current active 143 // series. As such metric is updated in intervals, it is possible 144 // that Receive ingests more series than the limit, before detecting that 145 // a tenant has exceeded the set limits. 146 v, ok := h.tenantCurrentSeriesMap[tenant] 147 if !ok { 148 return true, errors.Newf("tenant not in current series map") 149 } 150 151 var limit uint64 152 limit, ok = h.limitsPerTenant[tenant] 153 if !ok { 154 // Tenant has not been defined in config, so fallback to default. 155 limit = h.defaultLimit 156 } 157 158 // If tenant limit is 0 we treat it as unlimited. 159 if limit == 0 { 160 return true, nil 161 } 162 163 if v >= float64(limit) { 164 level.Error(h.logger).Log("msg", "tenant above limit", "tenant", tenant, "currentSeries", v, "limit", limit) 165 h.limitedRequests.WithLabelValues(tenant).Inc() 166 return false, nil 167 } 168 169 return true, nil 170 } 171 172 // nopSeriesLimit implements activeSeriesLimiter interface as no-op. 173 type nopSeriesLimit struct{} 174 175 func NewNopSeriesLimit() *nopSeriesLimit { 176 return &nopSeriesLimit{} 177 } 178 179 func (a *nopSeriesLimit) QueryMetaMonitoring(_ context.Context) error { 180 return nil 181 } 182 183 func (a *nopSeriesLimit) isUnderLimit(_ string) (bool, error) { 184 return true, nil 185 }