github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/purger/tombstones.go (about) 1 package purger 2 3 import ( 4 "context" 5 "sort" 6 "strconv" 7 "sync" 8 "time" 9 10 "github.com/go-kit/log/level" 11 "github.com/pkg/errors" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/prometheus/client_golang/prometheus/promauto" 14 "github.com/prometheus/common/model" 15 "github.com/prometheus/prometheus/pkg/labels" 16 "github.com/prometheus/prometheus/promql/parser" 17 18 util_log "github.com/cortexproject/cortex/pkg/util/log" 19 ) 20 21 const tombstonesReloadDuration = 5 * time.Minute 22 23 type tombstonesLoaderMetrics struct { 24 cacheGenLoadFailures prometheus.Counter 25 deleteRequestsLoadFailures prometheus.Counter 26 } 27 28 func newtombstonesLoaderMetrics(r prometheus.Registerer) *tombstonesLoaderMetrics { 29 m := tombstonesLoaderMetrics{} 30 31 m.cacheGenLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{ 32 Namespace: "cortex", 33 Name: "tombstones_loader_cache_gen_load_failures_total", 34 Help: "Total number of failures while loading cache generation number using tombstones loader", 35 }) 36 m.deleteRequestsLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{ 37 Namespace: "cortex", 38 Name: "tombstones_loader_cache_delete_requests_load_failures_total", 39 Help: "Total number of failures while loading delete requests using tombstones loader", 40 }) 41 42 return &m 43 } 44 45 // TombstonesSet holds all the pending delete requests for a user 46 type TombstonesSet struct { 47 tombstones []DeleteRequest 48 oldestTombstoneStart, newestTombstoneEnd model.Time // Used as optimization to find whether we want to iterate over tombstones or not 49 } 50 51 // Used for easier injection of mocks. 52 type DeleteStoreAPI interface { 53 getCacheGenerationNumbers(ctx context.Context, user string) (*cacheGenNumbers, error) 54 GetPendingDeleteRequestsForUser(ctx context.Context, id string) ([]DeleteRequest, error) 55 } 56 57 // TombstonesLoader loads delete requests and gen numbers from store and keeps checking for updates. 58 // It keeps checking for changes in gen numbers, which also means changes in delete requests and reloads specific users delete requests. 59 type TombstonesLoader struct { 60 tombstones map[string]*TombstonesSet 61 tombstonesMtx sync.RWMutex 62 63 cacheGenNumbers map[string]*cacheGenNumbers 64 cacheGenNumbersMtx sync.RWMutex 65 66 deleteStore DeleteStoreAPI 67 metrics *tombstonesLoaderMetrics 68 quit chan struct{} 69 } 70 71 // NewTombstonesLoader creates a TombstonesLoader 72 func NewTombstonesLoader(deleteStore DeleteStoreAPI, registerer prometheus.Registerer) *TombstonesLoader { 73 tl := TombstonesLoader{ 74 tombstones: map[string]*TombstonesSet{}, 75 cacheGenNumbers: map[string]*cacheGenNumbers{}, 76 deleteStore: deleteStore, 77 metrics: newtombstonesLoaderMetrics(registerer), 78 } 79 go tl.loop() 80 81 return &tl 82 } 83 84 // Stop stops TombstonesLoader 85 func (tl *TombstonesLoader) Stop() { 86 close(tl.quit) 87 } 88 89 func (tl *TombstonesLoader) loop() { 90 if tl.deleteStore == nil { 91 return 92 } 93 94 tombstonesReloadTimer := time.NewTicker(tombstonesReloadDuration) 95 for { 96 select { 97 case <-tombstonesReloadTimer.C: 98 err := tl.reloadTombstones() 99 if err != nil { 100 level.Error(util_log.Logger).Log("msg", "error reloading tombstones", "err", err) 101 } 102 case <-tl.quit: 103 return 104 } 105 } 106 } 107 108 func (tl *TombstonesLoader) reloadTombstones() error { 109 updatedGenNumbers := make(map[string]*cacheGenNumbers) 110 tl.cacheGenNumbersMtx.RLock() 111 112 // check for updates in loaded gen numbers 113 for userID, oldGenNumbers := range tl.cacheGenNumbers { 114 newGenNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID) 115 if err != nil { 116 tl.cacheGenNumbersMtx.RUnlock() 117 return err 118 } 119 120 if *oldGenNumbers != *newGenNumbers { 121 updatedGenNumbers[userID] = newGenNumbers 122 } 123 } 124 125 tl.cacheGenNumbersMtx.RUnlock() 126 127 // in frontend we load only cache gen numbers so short circuit here if there are no loaded deleted requests 128 // first call to GetPendingTombstones would avoid doing this. 129 tl.tombstonesMtx.RLock() 130 if len(tl.tombstones) == 0 { 131 tl.tombstonesMtx.RUnlock() 132 return nil 133 } 134 tl.tombstonesMtx.RUnlock() 135 136 // for all the updated gen numbers, reload delete requests 137 for userID, genNumbers := range updatedGenNumbers { 138 err := tl.loadPendingTombstones(userID) 139 if err != nil { 140 return err 141 } 142 143 tl.cacheGenNumbersMtx.Lock() 144 tl.cacheGenNumbers[userID] = genNumbers 145 tl.cacheGenNumbersMtx.Unlock() 146 } 147 148 return nil 149 } 150 151 // GetPendingTombstones returns all pending tombstones 152 func (tl *TombstonesLoader) GetPendingTombstones(userID string) (*TombstonesSet, error) { 153 tl.tombstonesMtx.RLock() 154 155 tombstoneSet, isOK := tl.tombstones[userID] 156 if isOK { 157 tl.tombstonesMtx.RUnlock() 158 return tombstoneSet, nil 159 } 160 161 tl.tombstonesMtx.RUnlock() 162 err := tl.loadPendingTombstones(userID) 163 if err != nil { 164 return nil, err 165 } 166 167 tl.tombstonesMtx.RLock() 168 defer tl.tombstonesMtx.RUnlock() 169 170 return tl.tombstones[userID], nil 171 } 172 173 // GetPendingTombstones returns all pending tombstones 174 func (tl *TombstonesLoader) GetPendingTombstonesForInterval(userID string, from, to model.Time) (*TombstonesSet, error) { 175 allTombstones, err := tl.GetPendingTombstones(userID) 176 if err != nil { 177 return nil, err 178 } 179 180 if !allTombstones.HasTombstonesForInterval(from, to) { 181 return &TombstonesSet{}, nil 182 } 183 184 filteredSet := TombstonesSet{oldestTombstoneStart: model.Now()} 185 186 for _, tombstone := range allTombstones.tombstones { 187 if !intervalsOverlap(model.Interval{Start: from, End: to}, model.Interval{Start: tombstone.StartTime, End: tombstone.EndTime}) { 188 continue 189 } 190 191 filteredSet.tombstones = append(filteredSet.tombstones, tombstone) 192 193 if tombstone.StartTime < filteredSet.oldestTombstoneStart { 194 filteredSet.oldestTombstoneStart = tombstone.StartTime 195 } 196 197 if tombstone.EndTime > filteredSet.newestTombstoneEnd { 198 filteredSet.newestTombstoneEnd = tombstone.EndTime 199 } 200 } 201 202 return &filteredSet, nil 203 } 204 205 func (tl *TombstonesLoader) loadPendingTombstones(userID string) error { 206 if tl.deleteStore == nil { 207 tl.tombstonesMtx.Lock() 208 defer tl.tombstonesMtx.Unlock() 209 210 tl.tombstones[userID] = &TombstonesSet{oldestTombstoneStart: 0, newestTombstoneEnd: 0} 211 return nil 212 } 213 214 pendingDeleteRequests, err := tl.deleteStore.GetPendingDeleteRequestsForUser(context.Background(), userID) 215 if err != nil { 216 tl.metrics.deleteRequestsLoadFailures.Inc() 217 return errors.Wrap(err, "error loading delete requests") 218 } 219 220 tombstoneSet := TombstonesSet{tombstones: pendingDeleteRequests, oldestTombstoneStart: model.Now()} 221 for i := range tombstoneSet.tombstones { 222 tombstoneSet.tombstones[i].Matchers = make([][]*labels.Matcher, len(tombstoneSet.tombstones[i].Selectors)) 223 224 for j, selector := range tombstoneSet.tombstones[i].Selectors { 225 tombstoneSet.tombstones[i].Matchers[j], err = parser.ParseMetricSelector(selector) 226 227 if err != nil { 228 tl.metrics.deleteRequestsLoadFailures.Inc() 229 return errors.Wrapf(err, "error parsing metric selector") 230 } 231 } 232 233 if tombstoneSet.tombstones[i].StartTime < tombstoneSet.oldestTombstoneStart { 234 tombstoneSet.oldestTombstoneStart = tombstoneSet.tombstones[i].StartTime 235 } 236 237 if tombstoneSet.tombstones[i].EndTime > tombstoneSet.newestTombstoneEnd { 238 tombstoneSet.newestTombstoneEnd = tombstoneSet.tombstones[i].EndTime 239 } 240 } 241 242 tl.tombstonesMtx.Lock() 243 defer tl.tombstonesMtx.Unlock() 244 tl.tombstones[userID] = &tombstoneSet 245 246 return nil 247 } 248 249 // GetStoreCacheGenNumber returns store cache gen number for a user 250 func (tl *TombstonesLoader) GetStoreCacheGenNumber(tenantIDs []string) string { 251 return tl.getCacheGenNumbersPerTenants(tenantIDs).store 252 } 253 254 // GetResultsCacheGenNumber returns results cache gen number for a user 255 func (tl *TombstonesLoader) GetResultsCacheGenNumber(tenantIDs []string) string { 256 return tl.getCacheGenNumbersPerTenants(tenantIDs).results 257 } 258 259 func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *cacheGenNumbers { 260 var result cacheGenNumbers 261 262 if len(tenantIDs) == 0 { 263 return &result 264 } 265 266 // keep the maximum value that's currently in result 267 var maxResults, maxStore int 268 269 for pos, tenantID := range tenantIDs { 270 numbers := tl.getCacheGenNumbers(tenantID) 271 272 // handle first tenant in the list 273 if pos == 0 { 274 // short cut if there is only one tenant 275 if len(tenantIDs) == 1 { 276 return numbers 277 } 278 279 // set first tenant string whatever happens next 280 result.results = numbers.results 281 result.store = numbers.store 282 } 283 284 // set results number string if it's higher than the ones before 285 if numbers.results != "" { 286 results, err := strconv.Atoi(numbers.results) 287 if err != nil { 288 level.Error(util_log.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err) 289 } else if maxResults < results { 290 maxResults = results 291 result.results = numbers.results 292 } 293 } 294 295 // set store number string if it's higher than the ones before 296 if numbers.store != "" { 297 store, err := strconv.Atoi(numbers.store) 298 if err != nil { 299 level.Error(util_log.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err) 300 } else if maxStore < store { 301 maxStore = store 302 result.store = numbers.store 303 } 304 } 305 } 306 307 return &result 308 } 309 310 func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers { 311 tl.cacheGenNumbersMtx.RLock() 312 if genNumbers, isOK := tl.cacheGenNumbers[userID]; isOK { 313 tl.cacheGenNumbersMtx.RUnlock() 314 return genNumbers 315 } 316 317 tl.cacheGenNumbersMtx.RUnlock() 318 319 if tl.deleteStore == nil { 320 tl.cacheGenNumbersMtx.Lock() 321 defer tl.cacheGenNumbersMtx.Unlock() 322 323 tl.cacheGenNumbers[userID] = &cacheGenNumbers{} 324 return tl.cacheGenNumbers[userID] 325 } 326 327 genNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID) 328 if err != nil { 329 level.Error(util_log.Logger).Log("msg", "error loading cache generation numbers", "err", err) 330 tl.metrics.cacheGenLoadFailures.Inc() 331 return &cacheGenNumbers{} 332 } 333 334 tl.cacheGenNumbersMtx.Lock() 335 defer tl.cacheGenNumbersMtx.Unlock() 336 337 tl.cacheGenNumbers[userID] = genNumbers 338 return genNumbers 339 } 340 341 // GetDeletedIntervals returns non-overlapping, sorted deleted intervals. 342 func (ts TombstonesSet) GetDeletedIntervals(lbls labels.Labels, from, to model.Time) []model.Interval { 343 if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd { 344 return nil 345 } 346 347 var deletedIntervals []model.Interval 348 requestedInterval := model.Interval{Start: from, End: to} 349 350 for i := range ts.tombstones { 351 overlaps, overlappingInterval := getOverlappingInterval(requestedInterval, 352 model.Interval{Start: ts.tombstones[i].StartTime, End: ts.tombstones[i].EndTime}) 353 354 if !overlaps { 355 continue 356 } 357 358 matches := false 359 for _, matchers := range ts.tombstones[i].Matchers { 360 if labels.Selector(matchers).Matches(lbls) { 361 matches = true 362 break 363 } 364 } 365 366 if !matches { 367 continue 368 } 369 370 if overlappingInterval == requestedInterval { 371 // whole interval deleted 372 return []model.Interval{requestedInterval} 373 } 374 375 deletedIntervals = append(deletedIntervals, overlappingInterval) 376 } 377 378 if len(deletedIntervals) == 0 { 379 return nil 380 } 381 382 return mergeIntervals(deletedIntervals) 383 } 384 385 // Len returns number of tombstones that are there 386 func (ts TombstonesSet) Len() int { 387 return len(ts.tombstones) 388 } 389 390 // HasTombstonesForInterval tells whether there are any tombstones which overlapping given interval 391 func (ts TombstonesSet) HasTombstonesForInterval(from, to model.Time) bool { 392 if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd { 393 return false 394 } 395 396 return true 397 } 398 399 // sorts and merges overlapping intervals 400 func mergeIntervals(intervals []model.Interval) []model.Interval { 401 if len(intervals) <= 1 { 402 return intervals 403 } 404 405 mergedIntervals := make([]model.Interval, 0, len(intervals)) 406 sort.Slice(intervals, func(i, j int) bool { 407 return intervals[i].Start < intervals[j].Start 408 }) 409 410 ongoingTrFrom, ongoingTrTo := intervals[0].Start, intervals[0].End 411 for i := 1; i < len(intervals); i++ { 412 // if there is no overlap add it to mergedIntervals 413 if intervals[i].Start > ongoingTrTo { 414 mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo}) 415 ongoingTrFrom = intervals[i].Start 416 ongoingTrTo = intervals[i].End 417 continue 418 } 419 420 // there is an overlap but check whether existing time range is bigger than the current one 421 if intervals[i].End > ongoingTrTo { 422 ongoingTrTo = intervals[i].End 423 } 424 } 425 426 // add the last time range 427 mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo}) 428 429 return mergedIntervals 430 } 431 432 func getOverlappingInterval(interval1, interval2 model.Interval) (bool, model.Interval) { 433 if interval2.Start > interval1.Start { 434 interval1.Start = interval2.Start 435 } 436 437 if interval2.End < interval1.End { 438 interval1.End = interval2.End 439 } 440 441 return interval1.Start < interval1.End, interval1 442 } 443 444 func intervalsOverlap(interval1, interval2 model.Interval) bool { 445 if interval1.Start > interval2.End || interval2.Start > interval1.End { 446 return false 447 } 448 449 return true 450 }