github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/purger/tombstones.go

github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/purger/tombstones.go (about)

     1  package purger
     2  
     3  import (
     4  	"context"
     5  	"sort"
     6  	"strconv"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/go-kit/log/level"
    11  	"github.com/pkg/errors"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/prometheus/client_golang/prometheus/promauto"
    14  	"github.com/prometheus/common/model"
    15  	"github.com/prometheus/prometheus/pkg/labels"
    16  	"github.com/prometheus/prometheus/promql/parser"
    17  
    18  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    19  )
    20  
    21  const tombstonesReloadDuration = 5 * time.Minute
    22  
    23  type tombstonesLoaderMetrics struct {
    24  	cacheGenLoadFailures       prometheus.Counter
    25  	deleteRequestsLoadFailures prometheus.Counter
    26  }
    27  
    28  func newtombstonesLoaderMetrics(r prometheus.Registerer) *tombstonesLoaderMetrics {
    29  	m := tombstonesLoaderMetrics{}
    30  
    31  	m.cacheGenLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
    32  		Namespace: "cortex",
    33  		Name:      "tombstones_loader_cache_gen_load_failures_total",
    34  		Help:      "Total number of failures while loading cache generation number using tombstones loader",
    35  	})
    36  	m.deleteRequestsLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
    37  		Namespace: "cortex",
    38  		Name:      "tombstones_loader_cache_delete_requests_load_failures_total",
    39  		Help:      "Total number of failures while loading delete requests using tombstones loader",
    40  	})
    41  
    42  	return &m
    43  }
    44  
    45  // TombstonesSet holds all the pending delete requests for a user
    46  type TombstonesSet struct {
    47  	tombstones                               []DeleteRequest
    48  	oldestTombstoneStart, newestTombstoneEnd model.Time // Used as optimization to find whether we want to iterate over tombstones or not
    49  }
    50  
    51  // Used for easier injection of mocks.
    52  type DeleteStoreAPI interface {
    53  	getCacheGenerationNumbers(ctx context.Context, user string) (*cacheGenNumbers, error)
    54  	GetPendingDeleteRequestsForUser(ctx context.Context, id string) ([]DeleteRequest, error)
    55  }
    56  
    57  // TombstonesLoader loads delete requests and gen numbers from store and keeps checking for updates.
    58  // It keeps checking for changes in gen numbers, which also means changes in delete requests and reloads specific users delete requests.
    59  type TombstonesLoader struct {
    60  	tombstones    map[string]*TombstonesSet
    61  	tombstonesMtx sync.RWMutex
    62  
    63  	cacheGenNumbers    map[string]*cacheGenNumbers
    64  	cacheGenNumbersMtx sync.RWMutex
    65  
    66  	deleteStore DeleteStoreAPI
    67  	metrics     *tombstonesLoaderMetrics
    68  	quit        chan struct{}
    69  }
    70  
    71  // NewTombstonesLoader creates a TombstonesLoader
    72  func NewTombstonesLoader(deleteStore DeleteStoreAPI, registerer prometheus.Registerer) *TombstonesLoader {
    73  	tl := TombstonesLoader{
    74  		tombstones:      map[string]*TombstonesSet{},
    75  		cacheGenNumbers: map[string]*cacheGenNumbers{},
    76  		deleteStore:     deleteStore,
    77  		metrics:         newtombstonesLoaderMetrics(registerer),
    78  	}
    79  	go tl.loop()
    80  
    81  	return &tl
    82  }
    83  
    84  // Stop stops TombstonesLoader
    85  func (tl *TombstonesLoader) Stop() {
    86  	close(tl.quit)
    87  }
    88  
    89  func (tl *TombstonesLoader) loop() {
    90  	if tl.deleteStore == nil {
    91  		return
    92  	}
    93  
    94  	tombstonesReloadTimer := time.NewTicker(tombstonesReloadDuration)
    95  	for {
    96  		select {
    97  		case <-tombstonesReloadTimer.C:
    98  			err := tl.reloadTombstones()
    99  			if err != nil {
   100  				level.Error(util_log.Logger).Log("msg", "error reloading tombstones", "err", err)
   101  			}
   102  		case <-tl.quit:
   103  			return
   104  		}
   105  	}
   106  }
   107  
   108  func (tl *TombstonesLoader) reloadTombstones() error {
   109  	updatedGenNumbers := make(map[string]*cacheGenNumbers)
   110  	tl.cacheGenNumbersMtx.RLock()
   111  
   112  	// check for updates in loaded gen numbers
   113  	for userID, oldGenNumbers := range tl.cacheGenNumbers {
   114  		newGenNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
   115  		if err != nil {
   116  			tl.cacheGenNumbersMtx.RUnlock()
   117  			return err
   118  		}
   119  
   120  		if *oldGenNumbers != *newGenNumbers {
   121  			updatedGenNumbers[userID] = newGenNumbers
   122  		}
   123  	}
   124  
   125  	tl.cacheGenNumbersMtx.RUnlock()
   126  
   127  	// in frontend we load only cache gen numbers so short circuit here if there are no loaded deleted requests
   128  	// first call to GetPendingTombstones would avoid doing this.
   129  	tl.tombstonesMtx.RLock()
   130  	if len(tl.tombstones) == 0 {
   131  		tl.tombstonesMtx.RUnlock()
   132  		return nil
   133  	}
   134  	tl.tombstonesMtx.RUnlock()
   135  
   136  	// for all the updated gen numbers, reload delete requests
   137  	for userID, genNumbers := range updatedGenNumbers {
   138  		err := tl.loadPendingTombstones(userID)
   139  		if err != nil {
   140  			return err
   141  		}
   142  
   143  		tl.cacheGenNumbersMtx.Lock()
   144  		tl.cacheGenNumbers[userID] = genNumbers
   145  		tl.cacheGenNumbersMtx.Unlock()
   146  	}
   147  
   148  	return nil
   149  }
   150  
   151  // GetPendingTombstones returns all pending tombstones
   152  func (tl *TombstonesLoader) GetPendingTombstones(userID string) (*TombstonesSet, error) {
   153  	tl.tombstonesMtx.RLock()
   154  
   155  	tombstoneSet, isOK := tl.tombstones[userID]
   156  	if isOK {
   157  		tl.tombstonesMtx.RUnlock()
   158  		return tombstoneSet, nil
   159  	}
   160  
   161  	tl.tombstonesMtx.RUnlock()
   162  	err := tl.loadPendingTombstones(userID)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	tl.tombstonesMtx.RLock()
   168  	defer tl.tombstonesMtx.RUnlock()
   169  
   170  	return tl.tombstones[userID], nil
   171  }
   172  
   173  // GetPendingTombstones returns all pending tombstones
   174  func (tl *TombstonesLoader) GetPendingTombstonesForInterval(userID string, from, to model.Time) (*TombstonesSet, error) {
   175  	allTombstones, err := tl.GetPendingTombstones(userID)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	if !allTombstones.HasTombstonesForInterval(from, to) {
   181  		return &TombstonesSet{}, nil
   182  	}
   183  
   184  	filteredSet := TombstonesSet{oldestTombstoneStart: model.Now()}
   185  
   186  	for _, tombstone := range allTombstones.tombstones {
   187  		if !intervalsOverlap(model.Interval{Start: from, End: to}, model.Interval{Start: tombstone.StartTime, End: tombstone.EndTime}) {
   188  			continue
   189  		}
   190  
   191  		filteredSet.tombstones = append(filteredSet.tombstones, tombstone)
   192  
   193  		if tombstone.StartTime < filteredSet.oldestTombstoneStart {
   194  			filteredSet.oldestTombstoneStart = tombstone.StartTime
   195  		}
   196  
   197  		if tombstone.EndTime > filteredSet.newestTombstoneEnd {
   198  			filteredSet.newestTombstoneEnd = tombstone.EndTime
   199  		}
   200  	}
   201  
   202  	return &filteredSet, nil
   203  }
   204  
   205  func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
   206  	if tl.deleteStore == nil {
   207  		tl.tombstonesMtx.Lock()
   208  		defer tl.tombstonesMtx.Unlock()
   209  
   210  		tl.tombstones[userID] = &TombstonesSet{oldestTombstoneStart: 0, newestTombstoneEnd: 0}
   211  		return nil
   212  	}
   213  
   214  	pendingDeleteRequests, err := tl.deleteStore.GetPendingDeleteRequestsForUser(context.Background(), userID)
   215  	if err != nil {
   216  		tl.metrics.deleteRequestsLoadFailures.Inc()
   217  		return errors.Wrap(err, "error loading delete requests")
   218  	}
   219  
   220  	tombstoneSet := TombstonesSet{tombstones: pendingDeleteRequests, oldestTombstoneStart: model.Now()}
   221  	for i := range tombstoneSet.tombstones {
   222  		tombstoneSet.tombstones[i].Matchers = make([][]*labels.Matcher, len(tombstoneSet.tombstones[i].Selectors))
   223  
   224  		for j, selector := range tombstoneSet.tombstones[i].Selectors {
   225  			tombstoneSet.tombstones[i].Matchers[j], err = parser.ParseMetricSelector(selector)
   226  
   227  			if err != nil {
   228  				tl.metrics.deleteRequestsLoadFailures.Inc()
   229  				return errors.Wrapf(err, "error parsing metric selector")
   230  			}
   231  		}
   232  
   233  		if tombstoneSet.tombstones[i].StartTime < tombstoneSet.oldestTombstoneStart {
   234  			tombstoneSet.oldestTombstoneStart = tombstoneSet.tombstones[i].StartTime
   235  		}
   236  
   237  		if tombstoneSet.tombstones[i].EndTime > tombstoneSet.newestTombstoneEnd {
   238  			tombstoneSet.newestTombstoneEnd = tombstoneSet.tombstones[i].EndTime
   239  		}
   240  	}
   241  
   242  	tl.tombstonesMtx.Lock()
   243  	defer tl.tombstonesMtx.Unlock()
   244  	tl.tombstones[userID] = &tombstoneSet
   245  
   246  	return nil
   247  }
   248  
   249  // GetStoreCacheGenNumber returns store cache gen number for a user
   250  func (tl *TombstonesLoader) GetStoreCacheGenNumber(tenantIDs []string) string {
   251  	return tl.getCacheGenNumbersPerTenants(tenantIDs).store
   252  }
   253  
   254  // GetResultsCacheGenNumber returns results cache gen number for a user
   255  func (tl *TombstonesLoader) GetResultsCacheGenNumber(tenantIDs []string) string {
   256  	return tl.getCacheGenNumbersPerTenants(tenantIDs).results
   257  }
   258  
   259  func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *cacheGenNumbers {
   260  	var result cacheGenNumbers
   261  
   262  	if len(tenantIDs) == 0 {
   263  		return &result
   264  	}
   265  
   266  	// keep the maximum value that's currently in result
   267  	var maxResults, maxStore int
   268  
   269  	for pos, tenantID := range tenantIDs {
   270  		numbers := tl.getCacheGenNumbers(tenantID)
   271  
   272  		// handle first tenant in the list
   273  		if pos == 0 {
   274  			// short cut if there is only one tenant
   275  			if len(tenantIDs) == 1 {
   276  				return numbers
   277  			}
   278  
   279  			// set first tenant string whatever happens next
   280  			result.results = numbers.results
   281  			result.store = numbers.store
   282  		}
   283  
   284  		// set results number string if it's higher than the ones before
   285  		if numbers.results != "" {
   286  			results, err := strconv.Atoi(numbers.results)
   287  			if err != nil {
   288  				level.Error(util_log.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err)
   289  			} else if maxResults < results {
   290  				maxResults = results
   291  				result.results = numbers.results
   292  			}
   293  		}
   294  
   295  		// set store number string if it's higher than the ones before
   296  		if numbers.store != "" {
   297  			store, err := strconv.Atoi(numbers.store)
   298  			if err != nil {
   299  				level.Error(util_log.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err)
   300  			} else if maxStore < store {
   301  				maxStore = store
   302  				result.store = numbers.store
   303  			}
   304  		}
   305  	}
   306  
   307  	return &result
   308  }
   309  
   310  func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers {
   311  	tl.cacheGenNumbersMtx.RLock()
   312  	if genNumbers, isOK := tl.cacheGenNumbers[userID]; isOK {
   313  		tl.cacheGenNumbersMtx.RUnlock()
   314  		return genNumbers
   315  	}
   316  
   317  	tl.cacheGenNumbersMtx.RUnlock()
   318  
   319  	if tl.deleteStore == nil {
   320  		tl.cacheGenNumbersMtx.Lock()
   321  		defer tl.cacheGenNumbersMtx.Unlock()
   322  
   323  		tl.cacheGenNumbers[userID] = &cacheGenNumbers{}
   324  		return tl.cacheGenNumbers[userID]
   325  	}
   326  
   327  	genNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
   328  	if err != nil {
   329  		level.Error(util_log.Logger).Log("msg", "error loading cache generation numbers", "err", err)
   330  		tl.metrics.cacheGenLoadFailures.Inc()
   331  		return &cacheGenNumbers{}
   332  	}
   333  
   334  	tl.cacheGenNumbersMtx.Lock()
   335  	defer tl.cacheGenNumbersMtx.Unlock()
   336  
   337  	tl.cacheGenNumbers[userID] = genNumbers
   338  	return genNumbers
   339  }
   340  
   341  // GetDeletedIntervals returns non-overlapping, sorted  deleted intervals.
   342  func (ts TombstonesSet) GetDeletedIntervals(lbls labels.Labels, from, to model.Time) []model.Interval {
   343  	if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd {
   344  		return nil
   345  	}
   346  
   347  	var deletedIntervals []model.Interval
   348  	requestedInterval := model.Interval{Start: from, End: to}
   349  
   350  	for i := range ts.tombstones {
   351  		overlaps, overlappingInterval := getOverlappingInterval(requestedInterval,
   352  			model.Interval{Start: ts.tombstones[i].StartTime, End: ts.tombstones[i].EndTime})
   353  
   354  		if !overlaps {
   355  			continue
   356  		}
   357  
   358  		matches := false
   359  		for _, matchers := range ts.tombstones[i].Matchers {
   360  			if labels.Selector(matchers).Matches(lbls) {
   361  				matches = true
   362  				break
   363  			}
   364  		}
   365  
   366  		if !matches {
   367  			continue
   368  		}
   369  
   370  		if overlappingInterval == requestedInterval {
   371  			// whole interval deleted
   372  			return []model.Interval{requestedInterval}
   373  		}
   374  
   375  		deletedIntervals = append(deletedIntervals, overlappingInterval)
   376  	}
   377  
   378  	if len(deletedIntervals) == 0 {
   379  		return nil
   380  	}
   381  
   382  	return mergeIntervals(deletedIntervals)
   383  }
   384  
   385  // Len returns number of tombstones that are there
   386  func (ts TombstonesSet) Len() int {
   387  	return len(ts.tombstones)
   388  }
   389  
   390  // HasTombstonesForInterval tells whether there are any tombstones which overlapping given interval
   391  func (ts TombstonesSet) HasTombstonesForInterval(from, to model.Time) bool {
   392  	if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd {
   393  		return false
   394  	}
   395  
   396  	return true
   397  }
   398  
   399  // sorts and merges overlapping intervals
   400  func mergeIntervals(intervals []model.Interval) []model.Interval {
   401  	if len(intervals) <= 1 {
   402  		return intervals
   403  	}
   404  
   405  	mergedIntervals := make([]model.Interval, 0, len(intervals))
   406  	sort.Slice(intervals, func(i, j int) bool {
   407  		return intervals[i].Start < intervals[j].Start
   408  	})
   409  
   410  	ongoingTrFrom, ongoingTrTo := intervals[0].Start, intervals[0].End
   411  	for i := 1; i < len(intervals); i++ {
   412  		// if there is no overlap add it to mergedIntervals
   413  		if intervals[i].Start > ongoingTrTo {
   414  			mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo})
   415  			ongoingTrFrom = intervals[i].Start
   416  			ongoingTrTo = intervals[i].End
   417  			continue
   418  		}
   419  
   420  		// there is an overlap but check whether existing time range is bigger than the current one
   421  		if intervals[i].End > ongoingTrTo {
   422  			ongoingTrTo = intervals[i].End
   423  		}
   424  	}
   425  
   426  	// add the last time range
   427  	mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo})
   428  
   429  	return mergedIntervals
   430  }
   431  
   432  func getOverlappingInterval(interval1, interval2 model.Interval) (bool, model.Interval) {
   433  	if interval2.Start > interval1.Start {
   434  		interval1.Start = interval2.Start
   435  	}
   436  
   437  	if interval2.End < interval1.End {
   438  		interval1.End = interval2.End
   439  	}
   440  
   441  	return interval1.Start < interval1.End, interval1
   442  }
   443  
   444  func intervalsOverlap(interval1, interval2 model.Interval) bool {
   445  	if interval1.Start > interval2.End || interval2.Start > interval1.End {
   446  		return false
   447  	}
   448  
   449  	return true
   450  }