github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ingester/user_state.go (about)

     1  package ingester
     2  
     3  import (
     4  	"context"
     5  	"net/http"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/go-kit/log"
    10  	"github.com/go-kit/log/level"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/prometheus/common/model"
    13  	"github.com/prometheus/prometheus/pkg/labels"
    14  	tsdb_record "github.com/prometheus/prometheus/tsdb/record"
    15  	"github.com/segmentio/fasthash/fnv1a"
    16  	"github.com/weaveworks/common/httpgrpc"
    17  
    18  	"github.com/cortexproject/cortex/pkg/cortexpb"
    19  	"github.com/cortexproject/cortex/pkg/ingester/client"
    20  	"github.com/cortexproject/cortex/pkg/ingester/index"
    21  	"github.com/cortexproject/cortex/pkg/tenant"
    22  	"github.com/cortexproject/cortex/pkg/util"
    23  	"github.com/cortexproject/cortex/pkg/util/extract"
    24  	util_math "github.com/cortexproject/cortex/pkg/util/math"
    25  	"github.com/cortexproject/cortex/pkg/util/spanlogger"
    26  	"github.com/cortexproject/cortex/pkg/util/validation"
    27  )
    28  
    29  // userStates holds the userState object for all users (tenants),
    30  // each one containing all the in-memory series for a given user.
    31  type userStates struct {
    32  	states  sync.Map
    33  	limiter *Limiter
    34  	cfg     Config
    35  	metrics *ingesterMetrics
    36  	logger  log.Logger
    37  }
    38  
    39  type userState struct {
    40  	limiter             *Limiter
    41  	userID              string
    42  	fpLocker            *fingerprintLocker
    43  	fpToSeries          *seriesMap
    44  	mapper              *fpMapper
    45  	index               *index.InvertedIndex
    46  	ingestedAPISamples  *util_math.EwmaRate
    47  	ingestedRuleSamples *util_math.EwmaRate
    48  	activeSeries        *ActiveSeries
    49  	logger              log.Logger
    50  
    51  	seriesInMetric *metricCounter
    52  
    53  	// Series metrics.
    54  	memSeries             prometheus.Gauge
    55  	memSeriesCreatedTotal prometheus.Counter
    56  	memSeriesRemovedTotal prometheus.Counter
    57  	discardedSamples      *prometheus.CounterVec
    58  	createdChunks         prometheus.Counter
    59  	activeSeriesGauge     prometheus.Gauge
    60  }
    61  
    62  // DiscardedSamples metric labels
    63  const (
    64  	perUserSeriesLimit   = "per_user_series_limit"
    65  	perMetricSeriesLimit = "per_metric_series_limit"
    66  )
    67  
    68  func newUserStates(limiter *Limiter, cfg Config, metrics *ingesterMetrics, logger log.Logger) *userStates {
    69  	return &userStates{
    70  		limiter: limiter,
    71  		cfg:     cfg,
    72  		metrics: metrics,
    73  		logger:  logger,
    74  	}
    75  }
    76  
    77  func (us *userStates) cp() map[string]*userState {
    78  	states := map[string]*userState{}
    79  	us.states.Range(func(key, value interface{}) bool {
    80  		states[key.(string)] = value.(*userState)
    81  		return true
    82  	})
    83  	return states
    84  }
    85  
    86  //nolint:unused
    87  func (us *userStates) gc() {
    88  	us.states.Range(func(key, value interface{}) bool {
    89  		state := value.(*userState)
    90  		if state.fpToSeries.length() == 0 {
    91  			us.states.Delete(key)
    92  			state.activeSeries.clear()
    93  			state.activeSeriesGauge.Set(0)
    94  		}
    95  		return true
    96  	})
    97  }
    98  
    99  func (us *userStates) updateRates() {
   100  	us.states.Range(func(key, value interface{}) bool {
   101  		state := value.(*userState)
   102  		state.ingestedAPISamples.Tick()
   103  		state.ingestedRuleSamples.Tick()
   104  		return true
   105  	})
   106  }
   107  
   108  // Labels will be copied if they are kept.
   109  func (us *userStates) updateActiveSeriesForUser(userID string, now time.Time, lbls []labels.Label) {
   110  	if s, ok := us.get(userID); ok {
   111  		s.activeSeries.UpdateSeries(lbls, now, func(l labels.Labels) labels.Labels { return cortexpb.CopyLabels(l) })
   112  	}
   113  }
   114  
   115  func (us *userStates) purgeAndUpdateActiveSeries(purgeTime time.Time) {
   116  	us.states.Range(func(key, value interface{}) bool {
   117  		state := value.(*userState)
   118  		state.activeSeries.Purge(purgeTime)
   119  		state.activeSeriesGauge.Set(float64(state.activeSeries.Active()))
   120  		return true
   121  	})
   122  }
   123  
   124  func (us *userStates) get(userID string) (*userState, bool) {
   125  	state, ok := us.states.Load(userID)
   126  	if !ok {
   127  		return nil, ok
   128  	}
   129  	return state.(*userState), ok
   130  }
   131  
   132  func (us *userStates) getOrCreate(userID string) *userState {
   133  	state, ok := us.get(userID)
   134  	if !ok {
   135  
   136  		logger := log.With(us.logger, "user", userID)
   137  		// Speculatively create a userState object and try to store it
   138  		// in the map.  Another goroutine may have got there before
   139  		// us, in which case this userState will be discarded
   140  		state = &userState{
   141  			userID:              userID,
   142  			limiter:             us.limiter,
   143  			fpToSeries:          newSeriesMap(),
   144  			fpLocker:            newFingerprintLocker(16 * 1024),
   145  			index:               index.New(),
   146  			ingestedAPISamples:  util_math.NewEWMARate(0.2, us.cfg.RateUpdatePeriod),
   147  			ingestedRuleSamples: util_math.NewEWMARate(0.2, us.cfg.RateUpdatePeriod),
   148  			seriesInMetric:      newMetricCounter(us.limiter, us.cfg.getIgnoreSeriesLimitForMetricNamesMap()),
   149  			logger:              logger,
   150  
   151  			memSeries:             us.metrics.memSeries,
   152  			memSeriesCreatedTotal: us.metrics.memSeriesCreatedTotal.WithLabelValues(userID),
   153  			memSeriesRemovedTotal: us.metrics.memSeriesRemovedTotal.WithLabelValues(userID),
   154  			discardedSamples:      validation.DiscardedSamples.MustCurryWith(prometheus.Labels{"user": userID}),
   155  			createdChunks:         us.metrics.createdChunks,
   156  
   157  			activeSeries:      NewActiveSeries(),
   158  			activeSeriesGauge: us.metrics.activeSeriesPerUser.WithLabelValues(userID),
   159  		}
   160  		state.mapper = newFPMapper(state.fpToSeries, logger)
   161  		stored, ok := us.states.LoadOrStore(userID, state)
   162  		if !ok {
   163  			us.metrics.memUsers.Inc()
   164  		}
   165  		state = stored.(*userState)
   166  	}
   167  
   168  	return state
   169  }
   170  
   171  // teardown ensures metrics are accurately updated if a userStates struct is discarded
   172  func (us *userStates) teardown() {
   173  	for _, u := range us.cp() {
   174  		u.memSeriesRemovedTotal.Add(float64(u.fpToSeries.length()))
   175  		u.memSeries.Sub(float64(u.fpToSeries.length()))
   176  		u.activeSeriesGauge.Set(0)
   177  		us.metrics.memUsers.Dec()
   178  	}
   179  }
   180  
   181  func (us *userStates) getViaContext(ctx context.Context) (*userState, bool, error) {
   182  	userID, err := tenant.TenantID(ctx)
   183  	if err != nil {
   184  		return nil, false, err
   185  	}
   186  	state, ok := us.get(userID)
   187  	return state, ok, nil
   188  }
   189  
   190  // NOTE: memory for `labels` is unsafe; anything retained beyond the
   191  // life of this function must be copied
   192  func (us *userStates) getOrCreateSeries(ctx context.Context, userID string, labels []cortexpb.LabelAdapter, record *WALRecord) (*userState, model.Fingerprint, *memorySeries, error) {
   193  	state := us.getOrCreate(userID)
   194  	// WARNING: `err` may have a reference to unsafe memory in `labels`
   195  	fp, series, err := state.getSeries(labels, record)
   196  	return state, fp, series, err
   197  }
   198  
   199  // NOTE: memory for `metric` is unsafe; anything retained beyond the
   200  // life of this function must be copied
   201  func (u *userState) getSeries(metric labelPairs, record *WALRecord) (model.Fingerprint, *memorySeries, error) {
   202  	rawFP := client.FastFingerprint(metric)
   203  	u.fpLocker.Lock(rawFP)
   204  	fp := u.mapper.mapFP(rawFP, metric)
   205  	if fp != rawFP {
   206  		u.fpLocker.Unlock(rawFP)
   207  		u.fpLocker.Lock(fp)
   208  	}
   209  
   210  	series, ok := u.fpToSeries.get(fp)
   211  	if ok {
   212  		return fp, series, nil
   213  	}
   214  
   215  	series, err := u.createSeriesWithFingerprint(fp, metric, record, false)
   216  	if err != nil {
   217  		u.fpLocker.Unlock(fp)
   218  		return 0, nil, err
   219  	}
   220  
   221  	return fp, series, nil
   222  }
   223  
   224  func (u *userState) createSeriesWithFingerprint(fp model.Fingerprint, metric labelPairs, record *WALRecord, recovery bool) (*memorySeries, error) {
   225  	// There's theoretically a relatively harmless race here if multiple
   226  	// goroutines get the length of the series map at the same time, then
   227  	// all proceed to add a new series. This is likely not worth addressing,
   228  	// as this should happen rarely (all samples from one push are added
   229  	// serially), and the overshoot in allowed series would be minimal.
   230  
   231  	if !recovery {
   232  		if err := u.limiter.AssertMaxSeriesPerUser(u.userID, u.fpToSeries.length()); err != nil {
   233  			return nil, makeLimitError(perUserSeriesLimit, u.limiter.FormatError(u.userID, err))
   234  		}
   235  	}
   236  
   237  	// MetricNameFromLabelAdapters returns a copy of the string in `metric`
   238  	metricName, err := extract.MetricNameFromLabelAdapters(metric)
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  
   243  	if !recovery {
   244  		// Check if the per-metric limit has been exceeded
   245  		if err = u.seriesInMetric.canAddSeriesFor(u.userID, metricName); err != nil {
   246  			// WARNING: returns a reference to `metric`
   247  			return nil, makeMetricLimitError(perMetricSeriesLimit, cortexpb.FromLabelAdaptersToLabels(metric), u.limiter.FormatError(u.userID, err))
   248  		}
   249  	}
   250  
   251  	u.memSeriesCreatedTotal.Inc()
   252  	u.memSeries.Inc()
   253  	u.seriesInMetric.increaseSeriesForMetric(metricName)
   254  
   255  	if record != nil {
   256  		lbls := make(labels.Labels, 0, len(metric))
   257  		for _, m := range metric {
   258  			lbls = append(lbls, labels.Label(m))
   259  		}
   260  		record.Series = append(record.Series, tsdb_record.RefSeries{
   261  			Ref:    uint64(fp),
   262  			Labels: lbls,
   263  		})
   264  	}
   265  
   266  	labels := u.index.Add(metric, fp) // Add() returns 'interned' values so the original labels are not retained
   267  	series := newMemorySeries(labels, u.createdChunks)
   268  	u.fpToSeries.put(fp, series)
   269  
   270  	return series, nil
   271  }
   272  
   273  func (u *userState) removeSeries(fp model.Fingerprint, metric labels.Labels) {
   274  	u.fpToSeries.del(fp)
   275  	u.index.Delete(metric, fp)
   276  
   277  	metricName := metric.Get(model.MetricNameLabel)
   278  	if metricName == "" {
   279  		// Series without a metric name should never be able to make it into
   280  		// the ingester's memory storage.
   281  		panic("No metric name label")
   282  	}
   283  
   284  	u.seriesInMetric.decreaseSeriesForMetric(metricName)
   285  
   286  	u.memSeriesRemovedTotal.Inc()
   287  	u.memSeries.Dec()
   288  }
   289  
   290  // forSeriesMatching passes all series matching the given matchers to the
   291  // provided callback. Deals with locking and the quirks of zero-length matcher
   292  // values. There are 2 callbacks:
   293  // - The `add` callback is called for each series while the lock is held, and
   294  //   is intend to be used by the caller to build a batch.
   295  // - The `send` callback is called at certain intervals specified by batchSize
   296  //   with no locks held, and is intended to be used by the caller to send the
   297  //   built batches.
   298  func (u *userState) forSeriesMatching(ctx context.Context, allMatchers []*labels.Matcher,
   299  	add func(context.Context, model.Fingerprint, *memorySeries) error,
   300  	send func(context.Context) error, batchSize int,
   301  ) error {
   302  	log, ctx := spanlogger.New(ctx, "forSeriesMatching")
   303  	defer log.Finish()
   304  
   305  	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
   306  	fps := u.index.Lookup(matchers)
   307  	if len(fps) > u.limiter.MaxSeriesPerQuery(u.userID) {
   308  		return httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "exceeded maximum number of series in a query")
   309  	}
   310  
   311  	level.Debug(log).Log("series", len(fps))
   312  
   313  	// We only hold one FP lock at once here, so no opportunity to deadlock.
   314  	sent := 0
   315  outer:
   316  	for _, fp := range fps {
   317  		if err := ctx.Err(); err != nil {
   318  			return err
   319  		}
   320  
   321  		u.fpLocker.Lock(fp)
   322  		series, ok := u.fpToSeries.get(fp)
   323  		if !ok {
   324  			u.fpLocker.Unlock(fp)
   325  			continue
   326  		}
   327  
   328  		for _, filter := range filters {
   329  			if !filter.Matches(series.metric.Get(filter.Name)) {
   330  				u.fpLocker.Unlock(fp)
   331  				continue outer
   332  			}
   333  		}
   334  
   335  		err := add(ctx, fp, series)
   336  		u.fpLocker.Unlock(fp)
   337  		if err != nil {
   338  			return err
   339  		}
   340  
   341  		sent++
   342  		if batchSize > 0 && sent%batchSize == 0 && send != nil {
   343  			if err = send(ctx); err != nil {
   344  				return nil
   345  			}
   346  		}
   347  	}
   348  
   349  	if batchSize > 0 && sent%batchSize > 0 && send != nil {
   350  		return send(ctx)
   351  	}
   352  	return nil
   353  }
   354  
   355  const numMetricCounterShards = 128
   356  
   357  type metricCounterShard struct {
   358  	mtx sync.Mutex
   359  	m   map[string]int
   360  }
   361  
   362  type metricCounter struct {
   363  	limiter *Limiter
   364  	shards  []metricCounterShard
   365  
   366  	ignoredMetrics map[string]struct{}
   367  }
   368  
   369  func newMetricCounter(limiter *Limiter, ignoredMetricsForSeriesCount map[string]struct{}) *metricCounter {
   370  	shards := make([]metricCounterShard, 0, numMetricCounterShards)
   371  	for i := 0; i < numMetricCounterShards; i++ {
   372  		shards = append(shards, metricCounterShard{
   373  			m: map[string]int{},
   374  		})
   375  	}
   376  	return &metricCounter{
   377  		limiter: limiter,
   378  		shards:  shards,
   379  
   380  		ignoredMetrics: ignoredMetricsForSeriesCount,
   381  	}
   382  }
   383  
   384  func (m *metricCounter) decreaseSeriesForMetric(metricName string) {
   385  	shard := m.getShard(metricName)
   386  	shard.mtx.Lock()
   387  	defer shard.mtx.Unlock()
   388  
   389  	shard.m[metricName]--
   390  	if shard.m[metricName] == 0 {
   391  		delete(shard.m, metricName)
   392  	}
   393  }
   394  
   395  func (m *metricCounter) getShard(metricName string) *metricCounterShard {
   396  	shard := &m.shards[util.HashFP(model.Fingerprint(fnv1a.HashString64(metricName)))%numMetricCounterShards]
   397  	return shard
   398  }
   399  
   400  func (m *metricCounter) canAddSeriesFor(userID, metric string) error {
   401  	if _, ok := m.ignoredMetrics[metric]; ok {
   402  		return nil
   403  	}
   404  
   405  	shard := m.getShard(metric)
   406  	shard.mtx.Lock()
   407  	defer shard.mtx.Unlock()
   408  
   409  	return m.limiter.AssertMaxSeriesPerMetric(userID, shard.m[metric])
   410  }
   411  
   412  func (m *metricCounter) increaseSeriesForMetric(metric string) {
   413  	shard := m.getShard(metric)
   414  	shard.mtx.Lock()
   415  	shard.m[metric]++
   416  	shard.mtx.Unlock()
   417  }