github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/cleaner/cleaner.go (about)

     1  package cleaner
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"flag"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  
    13  	"github.com/grafana/pyroscope/pkg/metastore/index/cleaner/retention"
    14  	"github.com/grafana/pyroscope/pkg/metastore/raftnode"
    15  )
    16  
    17  type Index interface {
    18  	TruncateIndex(context.Context, retention.Policy) error
    19  }
    20  
    21  type Config struct {
    22  	CleanupMaxPartitions int           `yaml:"cleanup_max_partitions"`
    23  	CleanupGracePeriod   time.Duration `yaml:"cleanup_grace_period"`
    24  	CleanupInterval      time.Duration `yaml:"cleanup_interval"`
    25  }
    26  
    27  func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
    28  	f.DurationVar(&c.CleanupInterval, prefix+"cleanup-interval", 0, "Interval for index cleanup check. 0 to disable.")
    29  	f.DurationVar(&c.CleanupGracePeriod, prefix+"cleanup-grace-period", time.Hour*6, "After a partition is eligible for deletion, it will be kept for this period before actually being evaluated. The period should cover the time difference between the block creation time and the data timestamps. Blocks are only deleted if all data in the block has passed the retention period, and the grace period delays the moment when the partition is evaluated for deletion.")
    30  	f.IntVar(&c.CleanupMaxPartitions, prefix+"cleanup-max-partitions", 32, "Maximum number of partitions to cleanup at once. A partition is qualified by partition key, tenant, and shard.")
    31  }
    32  
    33  // Cleaner is responsible for periodically cleaning up
    34  // the index by applying retention policies. As of now,
    35  // it only applies the time-based retention policy.
    36  type Cleaner struct {
    37  	logger    log.Logger
    38  	overrides retention.Overrides
    39  	config    Config
    40  	index     Index
    41  
    42  	started bool
    43  	cancel  context.CancelFunc
    44  	mu      sync.Mutex
    45  }
    46  
    47  func NewCleaner(logger log.Logger, overrides retention.Overrides, config Config, index Index) *Cleaner {
    48  	return &Cleaner{
    49  		logger:    logger,
    50  		overrides: overrides,
    51  		config:    config,
    52  		index:     index,
    53  	}
    54  }
    55  
    56  func (c *Cleaner) Start() {
    57  	if c.config.CleanupInterval == 0 {
    58  		return
    59  	}
    60  	c.mu.Lock()
    61  	defer c.mu.Unlock()
    62  	if c.started {
    63  		c.logger.Log("msg", "index cleaner already started")
    64  		return
    65  	}
    66  	ctx, cancel := context.WithCancel(context.Background())
    67  	c.cancel = cancel
    68  	c.started = true
    69  	go c.loop(ctx)
    70  	c.logger.Log("msg", "index cleaner started")
    71  }
    72  
    73  func (c *Cleaner) Stop() {
    74  	if c.config.CleanupInterval == 0 {
    75  		return
    76  	}
    77  	c.mu.Lock()
    78  	defer c.mu.Unlock()
    79  	if !c.started {
    80  		c.logger.Log("msg", "index cleaner already stopped")
    81  		return
    82  	}
    83  	if c.cancel != nil {
    84  		c.cancel()
    85  	}
    86  	c.started = false
    87  	c.logger.Log("msg", "index cleaner stopped")
    88  }
    89  
    90  func (c *Cleaner) loop(ctx context.Context) {
    91  	ticker := time.NewTicker(c.config.CleanupInterval)
    92  	defer ticker.Stop()
    93  	for {
    94  		select {
    95  		case <-ctx.Done():
    96  			return
    97  		case <-ticker.C:
    98  			rp := retention.NewTimeBasedRetentionPolicy(
    99  				log.With(c.logger, "component", "retention-policy"),
   100  				c.overrides,
   101  				c.config.CleanupMaxPartitions,
   102  				c.config.CleanupGracePeriod,
   103  				time.Now(),
   104  			)
   105  			switch err := c.index.TruncateIndex(ctx, rp); {
   106  			case err == nil:
   107  			case errors.Is(err, context.Canceled):
   108  				return
   109  			case raftnode.IsRaftLeadershipError(err):
   110  				level.Warn(c.logger).Log("msg", "leadership change; cleanup interrupted", "err", err)
   111  			default:
   112  				level.Error(c.logger).Log("msg", "cleanup attempt failed", "err", err)
   113  			}
   114  		}
   115  	}
   116  }