github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/purger/purger.go (about) 1 package purger 2 3 import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io/ioutil" 9 "sync" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/gogo/protobuf/proto" 15 "github.com/grafana/dskit/services" 16 "github.com/pkg/errors" 17 "github.com/prometheus/client_golang/prometheus" 18 "github.com/prometheus/client_golang/prometheus/promauto" 19 "github.com/prometheus/common/model" 20 "github.com/prometheus/prometheus/promql" 21 "github.com/prometheus/prometheus/promql/parser" 22 "github.com/weaveworks/common/user" 23 24 "github.com/cortexproject/cortex/pkg/chunk" 25 "github.com/cortexproject/cortex/pkg/cortexpb" 26 util_log "github.com/cortexproject/cortex/pkg/util/log" 27 ) 28 29 const ( 30 millisecondPerDay = int64(24 * time.Hour / time.Millisecond) 31 statusSuccess = "success" 32 statusFail = "fail" 33 loadRequestsInterval = time.Hour 34 retryFailedRequestsInterval = 15 * time.Minute 35 ) 36 37 type purgerMetrics struct { 38 deleteRequestsProcessedTotal *prometheus.CounterVec 39 deleteRequestsChunksSelectedTotal *prometheus.CounterVec 40 deleteRequestsProcessingFailures *prometheus.CounterVec 41 loadPendingRequestsAttempsTotal *prometheus.CounterVec 42 oldestPendingDeleteRequestAgeSeconds prometheus.Gauge 43 pendingDeleteRequestsCount prometheus.Gauge 44 } 45 46 func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics { 47 m := purgerMetrics{} 48 49 m.deleteRequestsProcessedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 50 Namespace: "cortex", 51 Name: "purger_delete_requests_processed_total", 52 Help: "Number of delete requests processed per user", 53 }, []string{"user"}) 54 m.deleteRequestsChunksSelectedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 55 Namespace: "cortex", 56 Name: "purger_delete_requests_chunks_selected_total", 57 Help: "Number of chunks selected while building delete plans per user", 58 }, []string{"user"}) 59 m.deleteRequestsProcessingFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 60 Namespace: "cortex", 61 Name: "purger_delete_requests_processing_failures_total", 62 Help: "Number of delete requests processing failures per user", 63 }, []string{"user"}) 64 m.loadPendingRequestsAttempsTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 65 Namespace: "cortex", 66 Name: "purger_load_pending_requests_attempts_total", 67 Help: "Number of attempts that were made to load pending requests with status", 68 }, []string{"status"}) 69 m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{ 70 Namespace: "cortex", 71 Name: "purger_oldest_pending_delete_request_age_seconds", 72 Help: "Age of oldest pending delete request in seconds, since they are over their cancellation period", 73 }) 74 m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{ 75 Namespace: "cortex", 76 Name: "purger_pending_delete_requests_count", 77 Help: "Count of delete requests which are over their cancellation period and have not finished processing yet", 78 }) 79 80 return &m 81 } 82 83 type deleteRequestWithLogger struct { 84 DeleteRequest 85 logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this 86 } 87 88 // Config holds config for chunks Purger 89 type Config struct { 90 Enable bool `yaml:"enable"` 91 NumWorkers int `yaml:"num_workers"` 92 ObjectStoreType string `yaml:"object_store_type"` 93 DeleteRequestCancelPeriod time.Duration `yaml:"delete_request_cancel_period"` 94 } 95 96 // RegisterFlags registers CLI flags for Config 97 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 98 f.BoolVar(&cfg.Enable, "purger.enable", false, "Enable purger to allow deletion of series. Be aware that Delete series feature is still experimental") 99 f.IntVar(&cfg.NumWorkers, "purger.num-workers", 2, "Number of workers executing delete plans in parallel") 100 f.StringVar(&cfg.ObjectStoreType, "purger.object-store-type", "", "Name of the object store to use for storing delete plans") 101 f.DurationVar(&cfg.DeleteRequestCancelPeriod, "purger.delete-request-cancel-period", 24*time.Hour, "Allow cancellation of delete request until duration after they are created. Data would be deleted only after delete requests have been older than this duration. Ideally this should be set to at least 24h.") 102 } 103 104 type workerJob struct { 105 planNo int 106 userID string 107 deleteRequestID string 108 logger log.Logger 109 } 110 111 // Purger does the purging of data which is requested to be deleted. Purger only works for chunks. 112 type Purger struct { 113 services.Service 114 115 cfg Config 116 deleteStore *DeleteStore 117 chunkStore chunk.Store 118 objectClient chunk.ObjectClient 119 metrics *purgerMetrics 120 121 executePlansChan chan deleteRequestWithLogger 122 workerJobChan chan workerJob 123 124 // we would only allow processing of singe delete request at a time since delete requests touching same chunks could change the chunk IDs of partially deleted chunks 125 // and break the purge plan for other requests 126 inProcessRequests *inProcessRequestsCollection 127 128 // We do not want to limit pulling new delete requests to a fixed interval which otherwise would limit number of delete requests we process per user. 129 // While loading delete requests if we find more requests from user pending to be processed, we just set their id in usersWithPendingRequests and 130 // when a user's delete request gets processed we just check this map to see whether we want to load more requests without waiting for next ticker to load new batch. 131 usersWithPendingRequests map[string]struct{} 132 usersWithPendingRequestsMtx sync.Mutex 133 pullNewRequestsChan chan struct{} 134 135 pendingPlansCount map[string]int // per request pending plan count 136 pendingPlansCountMtx sync.Mutex 137 138 wg sync.WaitGroup 139 } 140 141 // NewPurger creates a new Purger 142 func NewPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient, registerer prometheus.Registerer) (*Purger, error) { 143 util_log.WarnExperimentalUse("Delete series API") 144 145 purger := Purger{ 146 cfg: cfg, 147 deleteStore: deleteStore, 148 chunkStore: chunkStore, 149 objectClient: storageClient, 150 metrics: newPurgerMetrics(registerer), 151 pullNewRequestsChan: make(chan struct{}, 1), 152 executePlansChan: make(chan deleteRequestWithLogger, 50), 153 workerJobChan: make(chan workerJob, 50), 154 inProcessRequests: newInProcessRequestsCollection(), 155 usersWithPendingRequests: map[string]struct{}{}, 156 pendingPlansCount: map[string]int{}, 157 } 158 159 purger.Service = services.NewBasicService(purger.init, purger.loop, purger.stop) 160 return &purger, nil 161 } 162 163 // init starts workers, scheduler and then loads in process delete requests 164 func (p *Purger) init(ctx context.Context) error { 165 for i := 0; i < p.cfg.NumWorkers; i++ { 166 p.wg.Add(1) 167 go p.worker() 168 } 169 170 p.wg.Add(1) 171 go p.jobScheduler(ctx) 172 173 return p.loadInprocessDeleteRequests() 174 } 175 176 func (p *Purger) loop(ctx context.Context) error { 177 loadRequests := func() { 178 status := statusSuccess 179 180 err := p.pullDeleteRequestsToPlanDeletes() 181 if err != nil { 182 status = statusFail 183 level.Error(util_log.Logger).Log("msg", "error pulling delete requests for building plans", "err", err) 184 } 185 186 p.metrics.loadPendingRequestsAttempsTotal.WithLabelValues(status).Inc() 187 } 188 189 // load requests on startup instead of waiting for first ticker 190 loadRequests() 191 192 loadRequestsTicker := time.NewTicker(loadRequestsInterval) 193 defer loadRequestsTicker.Stop() 194 195 retryFailedRequestsTicker := time.NewTicker(retryFailedRequestsInterval) 196 defer retryFailedRequestsTicker.Stop() 197 198 for { 199 select { 200 case <-loadRequestsTicker.C: 201 loadRequests() 202 case <-p.pullNewRequestsChan: 203 loadRequests() 204 case <-retryFailedRequestsTicker.C: 205 p.retryFailedRequests() 206 case <-ctx.Done(): 207 return nil 208 } 209 } 210 } 211 212 // Stop waits until all background tasks stop. 213 func (p *Purger) stop(_ error) error { 214 p.wg.Wait() 215 return nil 216 } 217 218 func (p *Purger) retryFailedRequests() { 219 userIDsWithFailedRequest := p.inProcessRequests.listUsersWithFailedRequest() 220 221 for _, userID := range userIDsWithFailedRequest { 222 deleteRequest := p.inProcessRequests.get(userID) 223 if deleteRequest == nil { 224 level.Error(util_log.Logger).Log("msg", "expected an in-process delete request", "user", userID) 225 continue 226 } 227 228 p.inProcessRequests.unsetFailedRequestForUser(userID) 229 err := p.resumeStalledRequest(*deleteRequest) 230 if err != nil { 231 reqWithLogger := makeDeleteRequestWithLogger(*deleteRequest, util_log.Logger) 232 level.Error(reqWithLogger.logger).Log("msg", "failed to resume failed request", "err", err) 233 } 234 } 235 } 236 237 func (p *Purger) workerJobCleanup(job workerJob) { 238 err := p.removeDeletePlan(context.Background(), job.userID, job.deleteRequestID, job.planNo) 239 if err != nil { 240 level.Error(job.logger).Log("msg", "error removing delete plan", 241 "plan_no", job.planNo, "err", err) 242 return 243 } 244 245 p.pendingPlansCountMtx.Lock() 246 p.pendingPlansCount[job.deleteRequestID]-- 247 248 if p.pendingPlansCount[job.deleteRequestID] == 0 { 249 level.Info(job.logger).Log("msg", "finished execution of all plans, cleaning up and updating status of request") 250 251 err := p.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, StatusProcessed) 252 if err != nil { 253 level.Error(job.logger).Log("msg", "error updating delete request status to process", "err", err) 254 } 255 256 p.metrics.deleteRequestsProcessedTotal.WithLabelValues(job.userID).Inc() 257 delete(p.pendingPlansCount, job.deleteRequestID) 258 p.pendingPlansCountMtx.Unlock() 259 260 p.inProcessRequests.remove(job.userID) 261 262 // request loading of more delete request if 263 // - user has more pending requests and 264 // - we do not have a pending request to load more requests 265 p.usersWithPendingRequestsMtx.Lock() 266 defer p.usersWithPendingRequestsMtx.Unlock() 267 if _, ok := p.usersWithPendingRequests[job.userID]; ok { 268 delete(p.usersWithPendingRequests, job.userID) 269 select { 270 case p.pullNewRequestsChan <- struct{}{}: 271 // sent 272 default: 273 // already sent 274 } 275 } else if len(p.usersWithPendingRequests) == 0 { 276 // there are no pending requests from any of the users, set the oldest pending request and number of pending requests to 0 277 p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(0) 278 p.metrics.pendingDeleteRequestsCount.Set(0) 279 } 280 } else { 281 p.pendingPlansCountMtx.Unlock() 282 } 283 } 284 285 // we send all the delete plans to workerJobChan 286 func (p *Purger) jobScheduler(ctx context.Context) { 287 defer p.wg.Done() 288 289 for { 290 select { 291 case req := <-p.executePlansChan: 292 numPlans := numPlans(req.StartTime, req.EndTime) 293 level.Info(req.logger).Log("msg", "sending jobs to workers for purging data", "num_jobs", numPlans) 294 295 p.pendingPlansCountMtx.Lock() 296 p.pendingPlansCount[req.RequestID] = numPlans 297 p.pendingPlansCountMtx.Unlock() 298 299 for i := 0; i < numPlans; i++ { 300 p.workerJobChan <- workerJob{planNo: i, userID: req.UserID, 301 deleteRequestID: req.RequestID, logger: req.logger} 302 } 303 case <-ctx.Done(): 304 close(p.workerJobChan) 305 return 306 } 307 } 308 } 309 310 func (p *Purger) worker() { 311 defer p.wg.Done() 312 313 for job := range p.workerJobChan { 314 err := p.executePlan(job.userID, job.deleteRequestID, job.planNo, job.logger) 315 if err != nil { 316 p.metrics.deleteRequestsProcessingFailures.WithLabelValues(job.userID).Inc() 317 level.Error(job.logger).Log("msg", "error executing delete plan", 318 "plan_no", job.planNo, "err", err) 319 continue 320 } 321 322 p.workerJobCleanup(job) 323 } 324 } 325 326 func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Logger) (err error) { 327 logger = log.With(logger, "plan_no", planNo) 328 329 defer func() { 330 if err != nil { 331 p.inProcessRequests.setFailedRequestForUser(userID) 332 } 333 }() 334 335 plan, err := p.getDeletePlan(context.Background(), userID, requestID, planNo) 336 if err != nil { 337 if err == chunk.ErrStorageObjectNotFound { 338 level.Info(logger).Log("msg", "plan not found, must have been executed already") 339 // this means plan was already executed and got removed. Do nothing. 340 return nil 341 } 342 return err 343 } 344 345 level.Info(logger).Log("msg", "executing plan") 346 347 ctx := user.InjectOrgID(context.Background(), userID) 348 349 for i := range plan.ChunksGroup { 350 level.Debug(logger).Log("msg", "deleting chunks", "labels", plan.ChunksGroup[i].Labels) 351 352 for _, chunkDetails := range plan.ChunksGroup[i].Chunks { 353 chunkRef, err := chunk.ParseExternalKey(userID, chunkDetails.ID) 354 if err != nil { 355 return err 356 } 357 358 var partiallyDeletedInterval *model.Interval = nil 359 if chunkDetails.PartiallyDeletedInterval != nil { 360 partiallyDeletedInterval = &model.Interval{ 361 Start: model.Time(chunkDetails.PartiallyDeletedInterval.StartTimestampMs), 362 End: model.Time(chunkDetails.PartiallyDeletedInterval.EndTimestampMs), 363 } 364 } 365 366 err = p.chunkStore.DeleteChunk(ctx, chunkRef.From, chunkRef.Through, chunkRef.UserID, 367 chunkDetails.ID, cortexpb.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels), partiallyDeletedInterval) 368 if err != nil { 369 if isMissingChunkErr(err) { 370 level.Error(logger).Log("msg", "chunk not found for deletion. We may have already deleted it", 371 "chunk_id", chunkDetails.ID) 372 continue 373 } 374 return err 375 } 376 } 377 378 level.Debug(logger).Log("msg", "deleting series", "labels", plan.ChunksGroup[i].Labels) 379 380 // this is mostly required to clean up series ids from series store 381 err := p.chunkStore.DeleteSeriesIDs(ctx, model.Time(plan.PlanInterval.StartTimestampMs), model.Time(plan.PlanInterval.EndTimestampMs), 382 userID, cortexpb.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels)) 383 if err != nil { 384 return err 385 } 386 } 387 388 level.Info(logger).Log("msg", "finished execution of plan") 389 390 return 391 } 392 393 // we need to load all in process delete requests on startup to finish them first 394 func (p *Purger) loadInprocessDeleteRequests() error { 395 inprocessRequests, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan) 396 if err != nil { 397 return err 398 } 399 400 requestsWithDeletingStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting) 401 if err != nil { 402 return err 403 } 404 405 inprocessRequests = append(inprocessRequests, requestsWithDeletingStatus...) 406 407 for i := range inprocessRequests { 408 deleteRequest := inprocessRequests[i] 409 p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest) 410 req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger) 411 412 level.Info(req.logger).Log("msg", "resuming in process delete requests", "status", deleteRequest.Status) 413 err = p.resumeStalledRequest(deleteRequest) 414 if err != nil { 415 level.Error(req.logger).Log("msg", "failed to resume stalled request", "err", err) 416 } 417 418 } 419 420 return nil 421 } 422 423 func (p *Purger) resumeStalledRequest(deleteRequest DeleteRequest) error { 424 req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger) 425 426 if deleteRequest.Status == StatusBuildingPlan { 427 err := p.buildDeletePlan(req) 428 if err != nil { 429 p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc() 430 return errors.Wrap(err, "failed to build delete plan") 431 } 432 433 deleteRequest.Status = StatusDeleting 434 } 435 436 if deleteRequest.Status == StatusDeleting { 437 level.Info(req.logger).Log("msg", "sending delete request for execution") 438 p.executePlansChan <- req 439 } 440 441 return nil 442 } 443 444 // pullDeleteRequestsToPlanDeletes pulls delete requests which do not have their delete plans built yet and sends them for building delete plans 445 // after pulling delete requests for building plans, it updates its status to StatusBuildingPlan status to avoid picking this up again next time 446 func (p *Purger) pullDeleteRequestsToPlanDeletes() error { 447 deleteRequests, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusReceived) 448 if err != nil { 449 return err 450 } 451 452 pendingDeleteRequestsCount := p.inProcessRequests.len() 453 now := model.Now() 454 oldestPendingRequestCreatedAt := model.Time(0) 455 456 // requests which are still being processed are also considered pending 457 if pendingDeleteRequestsCount != 0 { 458 oldestInProcessRequest := p.inProcessRequests.getOldest() 459 if oldestInProcessRequest != nil { 460 oldestPendingRequestCreatedAt = oldestInProcessRequest.CreatedAt 461 } 462 } 463 464 for i := range deleteRequests { 465 deleteRequest := deleteRequests[i] 466 467 // adding an extra minute here to avoid a race between cancellation of request and picking of the request for processing 468 if deleteRequest.CreatedAt.Add(p.cfg.DeleteRequestCancelPeriod).Add(time.Minute).After(model.Now()) { 469 continue 470 } 471 472 pendingDeleteRequestsCount++ 473 if oldestPendingRequestCreatedAt == 0 || deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) { 474 oldestPendingRequestCreatedAt = deleteRequest.CreatedAt 475 } 476 477 if inprocessDeleteRequest := p.inProcessRequests.get(deleteRequest.UserID); inprocessDeleteRequest != nil { 478 p.usersWithPendingRequestsMtx.Lock() 479 p.usersWithPendingRequests[deleteRequest.UserID] = struct{}{} 480 p.usersWithPendingRequestsMtx.Unlock() 481 482 level.Debug(util_log.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process", 483 "inprocess_request_id", inprocessDeleteRequest.RequestID, 484 "skipped_request_id", deleteRequest.RequestID, "user_id", deleteRequest.UserID) 485 continue 486 } 487 488 err = p.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, StatusBuildingPlan) 489 if err != nil { 490 return err 491 } 492 493 deleteRequest.Status = StatusBuildingPlan 494 p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest) 495 req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger) 496 497 level.Info(req.logger).Log("msg", "building plan for a new delete request") 498 499 err := p.buildDeletePlan(req) 500 if err != nil { 501 p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc() 502 503 // We do not want to remove this delete request from inProcessRequests to make sure 504 // we do not move multiple deleting requests in deletion process. 505 // None of the other delete requests from the user would be considered for processing until then. 506 level.Error(req.logger).Log("msg", "error building delete plan", "err", err) 507 return err 508 } 509 510 level.Info(req.logger).Log("msg", "sending delete request for execution") 511 p.executePlansChan <- req 512 } 513 514 // track age of oldest delete request since they are over their cancellation period 515 oldestPendingRequestAge := time.Duration(0) 516 if oldestPendingRequestCreatedAt != 0 { 517 oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod)) 518 } 519 p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second)) 520 p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount)) 521 522 return nil 523 } 524 525 // buildDeletePlan builds per day delete plan for given delete requests. 526 // A days plan will include chunk ids and labels of all the chunks which are supposed to be deleted. 527 // Chunks are grouped together by labels to avoid storing labels repetitively. 528 // After building delete plans it updates status of delete request to StatusDeleting and sends it for execution 529 func (p *Purger) buildDeletePlan(req deleteRequestWithLogger) (err error) { 530 ctx := context.Background() 531 ctx = user.InjectOrgID(ctx, req.UserID) 532 533 defer func() { 534 if err != nil { 535 p.inProcessRequests.setFailedRequestForUser(req.UserID) 536 } else { 537 req.Status = StatusDeleting 538 p.inProcessRequests.set(req.UserID, &req.DeleteRequest) 539 } 540 }() 541 542 perDayTimeRange := splitByDay(req.StartTime, req.EndTime) 543 level.Info(req.logger).Log("msg", "building delete plan", "num_plans", len(perDayTimeRange)) 544 545 plans := make([][]byte, len(perDayTimeRange)) 546 includedChunkIDs := map[string]struct{}{} 547 548 for i, planRange := range perDayTimeRange { 549 chunksGroups := []ChunksGroup{} 550 551 for _, selector := range req.Selectors { 552 matchers, err := parser.ParseMetricSelector(selector) 553 if err != nil { 554 return err 555 } 556 557 chunks, err := p.chunkStore.Get(ctx, req.UserID, planRange.Start, planRange.End, matchers...) 558 if err != nil { 559 return err 560 } 561 562 var cg []ChunksGroup 563 cg, includedChunkIDs = groupChunks(chunks, req.StartTime, req.EndTime, includedChunkIDs) 564 565 if len(cg) != 0 { 566 chunksGroups = append(chunksGroups, cg...) 567 } 568 } 569 570 plan := DeletePlan{ 571 PlanInterval: &Interval{ 572 StartTimestampMs: int64(planRange.Start), 573 EndTimestampMs: int64(planRange.End), 574 }, 575 ChunksGroup: chunksGroups, 576 } 577 578 pb, err := proto.Marshal(&plan) 579 if err != nil { 580 return err 581 } 582 583 plans[i] = pb 584 } 585 586 err = p.putDeletePlans(ctx, req.UserID, req.RequestID, plans) 587 if err != nil { 588 return 589 } 590 591 err = p.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, StatusDeleting) 592 if err != nil { 593 return 594 } 595 596 p.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(req.UserID).Add(float64(len(includedChunkIDs))) 597 598 level.Info(req.logger).Log("msg", "built delete plans", "num_plans", len(perDayTimeRange)) 599 600 return 601 } 602 603 func (p *Purger) putDeletePlans(ctx context.Context, userID, requestID string, plans [][]byte) error { 604 for i, plan := range plans { 605 objectKey := buildObjectKeyForPlan(userID, requestID, i) 606 607 err := p.objectClient.PutObject(ctx, objectKey, bytes.NewReader(plan)) 608 if err != nil { 609 return err 610 } 611 } 612 613 return nil 614 } 615 616 func (p *Purger) getDeletePlan(ctx context.Context, userID, requestID string, planNo int) (*DeletePlan, error) { 617 objectKey := buildObjectKeyForPlan(userID, requestID, planNo) 618 619 readCloser, err := p.objectClient.GetObject(ctx, objectKey) 620 if err != nil { 621 return nil, err 622 } 623 624 defer readCloser.Close() 625 626 buf, err := ioutil.ReadAll(readCloser) 627 if err != nil { 628 return nil, err 629 } 630 631 var plan DeletePlan 632 err = proto.Unmarshal(buf, &plan) 633 if err != nil { 634 return nil, err 635 } 636 637 return &plan, nil 638 } 639 640 func (p *Purger) removeDeletePlan(ctx context.Context, userID, requestID string, planNo int) error { 641 objectKey := buildObjectKeyForPlan(userID, requestID, planNo) 642 return p.objectClient.DeleteObject(ctx, objectKey) 643 } 644 645 // returns interval per plan 646 func splitByDay(start, end model.Time) []model.Interval { 647 numOfDays := numPlans(start, end) 648 649 perDayTimeRange := make([]model.Interval, numOfDays) 650 startOfNextDay := model.Time(((int64(start) / millisecondPerDay) + 1) * millisecondPerDay) 651 perDayTimeRange[0] = model.Interval{Start: start, End: startOfNextDay - 1} 652 653 for i := 1; i < numOfDays; i++ { 654 interval := model.Interval{Start: startOfNextDay} 655 startOfNextDay += model.Time(millisecondPerDay) 656 interval.End = startOfNextDay - 1 657 perDayTimeRange[i] = interval 658 } 659 660 perDayTimeRange[numOfDays-1].End = end 661 662 return perDayTimeRange 663 } 664 665 func numPlans(start, end model.Time) int { 666 // rounding down start to start of the day 667 if start%model.Time(millisecondPerDay) != 0 { 668 start = model.Time((int64(start) / millisecondPerDay) * millisecondPerDay) 669 } 670 671 // rounding up end to end of the day 672 if end%model.Time(millisecondPerDay) != 0 { 673 end = model.Time((int64(end)/millisecondPerDay)*millisecondPerDay + millisecondPerDay) 674 } 675 676 return int(int64(end-start) / millisecondPerDay) 677 } 678 679 // groups chunks together by unique label sets i.e all the chunks with same labels would be stored in a group 680 // chunk details are stored in groups for each unique label set to avoid storing them repetitively for each chunk 681 func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time, includedChunkIDs map[string]struct{}) ([]ChunksGroup, map[string]struct{}) { 682 metricToChunks := make(map[string]ChunksGroup) 683 684 for _, chk := range chunks { 685 chunkID := chk.ExternalKey() 686 687 if _, ok := includedChunkIDs[chunkID]; ok { 688 continue 689 } 690 // chunk.Metric are assumed to be sorted which should give same value from String() for same series. 691 // If they stop being sorted then in the worst case we would lose the benefit of grouping chunks to avoid storing labels repetitively. 692 metricString := chk.Metric.String() 693 group, ok := metricToChunks[metricString] 694 if !ok { 695 group = ChunksGroup{Labels: cortexpb.FromLabelsToLabelAdapters(chk.Metric)} 696 } 697 698 chunkDetails := ChunkDetails{ID: chunkID} 699 700 if deleteFrom > chk.From || deleteThrough < chk.Through { 701 partiallyDeletedInterval := Interval{StartTimestampMs: int64(chk.From), EndTimestampMs: int64(chk.Through)} 702 703 if deleteFrom > chk.From { 704 partiallyDeletedInterval.StartTimestampMs = int64(deleteFrom) 705 } 706 707 if deleteThrough < chk.Through { 708 partiallyDeletedInterval.EndTimestampMs = int64(deleteThrough) 709 } 710 chunkDetails.PartiallyDeletedInterval = &partiallyDeletedInterval 711 } 712 713 group.Chunks = append(group.Chunks, chunkDetails) 714 includedChunkIDs[chunkID] = struct{}{} 715 metricToChunks[metricString] = group 716 } 717 718 chunksGroups := make([]ChunksGroup, 0, len(metricToChunks)) 719 720 for _, group := range metricToChunks { 721 chunksGroups = append(chunksGroups, group) 722 } 723 724 return chunksGroups, includedChunkIDs 725 } 726 727 func isMissingChunkErr(err error) bool { 728 if err == chunk.ErrStorageObjectNotFound { 729 return true 730 } 731 if promqlStorageErr, ok := err.(promql.ErrStorage); ok && promqlStorageErr.Err == chunk.ErrStorageObjectNotFound { 732 return true 733 } 734 735 return false 736 } 737 738 func buildObjectKeyForPlan(userID, requestID string, planNo int) string { 739 return fmt.Sprintf("%s:%s/%d", userID, requestID, planNo) 740 } 741 742 func makeDeleteRequestWithLogger(deleteRequest DeleteRequest, l log.Logger) deleteRequestWithLogger { 743 logger := log.With(l, "user_id", deleteRequest.UserID, "request_id", deleteRequest.RequestID) 744 return deleteRequestWithLogger{deleteRequest, logger} 745 } 746 747 // inProcessRequestsCollection stores DeleteRequests which are in process by each user. 748 // Currently we only allow processing of one delete request per user so it stores single DeleteRequest per user. 749 type inProcessRequestsCollection struct { 750 requests map[string]*DeleteRequest 751 usersWithFailedRequests map[string]struct{} 752 mtx sync.RWMutex 753 } 754 755 func newInProcessRequestsCollection() *inProcessRequestsCollection { 756 return &inProcessRequestsCollection{ 757 requests: map[string]*DeleteRequest{}, 758 usersWithFailedRequests: map[string]struct{}{}, 759 } 760 } 761 762 func (i *inProcessRequestsCollection) set(userID string, request *DeleteRequest) { 763 i.mtx.Lock() 764 defer i.mtx.Unlock() 765 766 i.requests[userID] = request 767 } 768 769 func (i *inProcessRequestsCollection) get(userID string) *DeleteRequest { 770 i.mtx.RLock() 771 defer i.mtx.RUnlock() 772 773 return i.requests[userID] 774 } 775 776 func (i *inProcessRequestsCollection) remove(userID string) { 777 i.mtx.Lock() 778 defer i.mtx.Unlock() 779 780 delete(i.requests, userID) 781 } 782 783 func (i *inProcessRequestsCollection) len() int { 784 i.mtx.RLock() 785 defer i.mtx.RUnlock() 786 787 return len(i.requests) 788 } 789 790 func (i *inProcessRequestsCollection) getOldest() *DeleteRequest { 791 i.mtx.RLock() 792 defer i.mtx.RUnlock() 793 794 var oldestRequest *DeleteRequest 795 for _, request := range i.requests { 796 if oldestRequest == nil || request.CreatedAt.Before(oldestRequest.CreatedAt) { 797 oldestRequest = request 798 } 799 } 800 801 return oldestRequest 802 } 803 804 func (i *inProcessRequestsCollection) setFailedRequestForUser(userID string) { 805 i.mtx.Lock() 806 defer i.mtx.Unlock() 807 808 i.usersWithFailedRequests[userID] = struct{}{} 809 } 810 811 func (i *inProcessRequestsCollection) unsetFailedRequestForUser(userID string) { 812 i.mtx.Lock() 813 defer i.mtx.Unlock() 814 815 delete(i.usersWithFailedRequests, userID) 816 } 817 818 func (i *inProcessRequestsCollection) listUsersWithFailedRequest() []string { 819 i.mtx.RLock() 820 defer i.mtx.RUnlock() 821 822 userIDs := make([]string, 0, len(i.usersWithFailedRequests)) 823 for userID := range i.usersWithFailedRequests { 824 userIDs = append(userIDs, userID) 825 } 826 827 return userIDs 828 }