github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/retention/expiration.go (about)

     1  package retention
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/go-kit/log/level"
     8  	"github.com/prometheus/common/model"
     9  	"github.com/prometheus/prometheus/model/labels"
    10  
    11  	"github.com/grafana/loki/pkg/util/filter"
    12  	util_log "github.com/grafana/loki/pkg/util/log"
    13  	"github.com/grafana/loki/pkg/validation"
    14  )
    15  
    16  // IntervalFilter contains the interval to delete
    17  // and the function that filters lines. These will be
    18  // applied to a chunk.
    19  type IntervalFilter struct {
    20  	Interval model.Interval
    21  	Filter   filter.Func
    22  }
    23  
    24  type ExpirationChecker interface {
    25  	Expired(ref ChunkEntry, now model.Time) (bool, []IntervalFilter)
    26  	IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool
    27  	MarkPhaseStarted()
    28  	MarkPhaseFailed()
    29  	MarkPhaseTimedOut()
    30  	MarkPhaseFinished()
    31  	DropFromIndex(ref ChunkEntry, tableEndTime model.Time, now model.Time) bool
    32  }
    33  
    34  type expirationChecker struct {
    35  	tenantsRetention         *TenantsRetention
    36  	latestRetentionStartTime latestRetentionStartTime
    37  }
    38  
    39  type Limits interface {
    40  	RetentionPeriod(userID string) time.Duration
    41  	StreamRetention(userID string) []validation.StreamRetention
    42  	AllByUserID() map[string]*validation.Limits
    43  	DefaultLimits() *validation.Limits
    44  }
    45  
    46  func NewExpirationChecker(limits Limits) ExpirationChecker {
    47  	return &expirationChecker{
    48  		tenantsRetention: NewTenantsRetention(limits),
    49  	}
    50  }
    51  
    52  // Expired tells if a ref chunk is expired based on retention rules.
    53  func (e *expirationChecker) Expired(ref ChunkEntry, now model.Time) (bool, []IntervalFilter) {
    54  	userID := unsafeGetString(ref.UserID)
    55  	period := e.tenantsRetention.RetentionPeriodFor(userID, ref.Labels)
    56  	return now.Sub(ref.Through) > period, nil
    57  }
    58  
    59  // DropFromIndex tells if it is okay to drop the chunk entry from index table.
    60  // We check if tableEndTime is out of retention period, calculated using the labels from the chunk.
    61  // If the tableEndTime is out of retention then we can drop the chunk entry without removing the chunk from the store.
    62  func (e *expirationChecker) DropFromIndex(ref ChunkEntry, tableEndTime model.Time, now model.Time) bool {
    63  	userID := unsafeGetString(ref.UserID)
    64  	period := e.tenantsRetention.RetentionPeriodFor(userID, ref.Labels)
    65  	return now.Sub(tableEndTime) > period
    66  }
    67  
    68  func (e *expirationChecker) MarkPhaseStarted() {
    69  	e.latestRetentionStartTime = findLatestRetentionStartTime(model.Now(), e.tenantsRetention.limits)
    70  	level.Info(util_log.Logger).Log("msg", fmt.Sprintf("overall smallest retention period %v, default smallest retention period %v",
    71  		e.latestRetentionStartTime.overall, e.latestRetentionStartTime.defaults))
    72  }
    73  
    74  func (e *expirationChecker) MarkPhaseFailed()   {}
    75  func (e *expirationChecker) MarkPhaseTimedOut() {}
    76  func (e *expirationChecker) MarkPhaseFinished() {}
    77  
    78  func (e *expirationChecker) IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool {
    79  	// when userID is empty, it means we are checking for common index table. In this case we use e.overallLatestRetentionStartTime.
    80  	latestRetentionStartTime := e.latestRetentionStartTime.overall
    81  	if userID != "" {
    82  		// when userID is not empty, it means we are checking for user index table.
    83  		latestRetentionStartTimeForUser, ok := e.latestRetentionStartTime.byUser[userID]
    84  		if ok {
    85  			// user has custom retention config, let us use user specific latest retention start time.
    86  			latestRetentionStartTime = latestRetentionStartTimeForUser
    87  		} else {
    88  			// user does not have custom retention config, let us use default latest retention start time.
    89  			latestRetentionStartTime = e.latestRetentionStartTime.defaults
    90  		}
    91  	}
    92  	return interval.Start.Before(latestRetentionStartTime)
    93  }
    94  
    95  // NeverExpiringExpirationChecker returns an expiration checker that never expires anything
    96  func NeverExpiringExpirationChecker(limits Limits) ExpirationChecker {
    97  	return &neverExpiringExpirationChecker{}
    98  }
    99  
   100  type neverExpiringExpirationChecker struct{}
   101  
   102  func (e *neverExpiringExpirationChecker) Expired(ref ChunkEntry, now model.Time) (bool, []IntervalFilter) {
   103  	return false, nil
   104  }
   105  func (e *neverExpiringExpirationChecker) IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool {
   106  	return false
   107  }
   108  func (e *neverExpiringExpirationChecker) MarkPhaseStarted()  {}
   109  func (e *neverExpiringExpirationChecker) MarkPhaseFailed()   {}
   110  func (e *neverExpiringExpirationChecker) MarkPhaseTimedOut() {}
   111  func (e *neverExpiringExpirationChecker) MarkPhaseFinished() {}
   112  func (e *neverExpiringExpirationChecker) DropFromIndex(ref ChunkEntry, tableEndTime model.Time, now model.Time) bool {
   113  	return false
   114  }
   115  
   116  type TenantsRetention struct {
   117  	limits Limits
   118  }
   119  
   120  func NewTenantsRetention(l Limits) *TenantsRetention {
   121  	return &TenantsRetention{
   122  		limits: l,
   123  	}
   124  }
   125  
   126  func (tr *TenantsRetention) RetentionPeriodFor(userID string, lbs labels.Labels) time.Duration {
   127  	streamRetentions := tr.limits.StreamRetention(userID)
   128  	globalRetention := tr.limits.RetentionPeriod(userID)
   129  	var (
   130  		matchedRule validation.StreamRetention
   131  		found       bool
   132  	)
   133  Outer:
   134  	for _, streamRetention := range streamRetentions {
   135  		for _, m := range streamRetention.Matchers {
   136  			if !m.Matches(lbs.Get(m.Name)) {
   137  				continue Outer
   138  			}
   139  		}
   140  		// the rule is matched.
   141  		if found {
   142  			// if the current matched rule has a higher priority we keep it.
   143  			if matchedRule.Priority > streamRetention.Priority {
   144  				continue
   145  			}
   146  			// if priority is equal we keep the lowest retention.
   147  			if matchedRule.Priority == streamRetention.Priority && matchedRule.Period <= streamRetention.Period {
   148  				continue
   149  			}
   150  		}
   151  		found = true
   152  		matchedRule = streamRetention
   153  	}
   154  	if found {
   155  		return time.Duration(matchedRule.Period)
   156  	}
   157  	return globalRetention
   158  }
   159  
   160  type latestRetentionStartTime struct {
   161  	// defaults holds latest retention start time considering only default retention config.
   162  	// It is used to determine if user index table may have any expired chunks when the user does not have any custom retention config set.
   163  	defaults model.Time
   164  	// overall holds latest retention start time for all users considering both default and per user retention config.
   165  	// It is used to determine if common index table may have any expired chunks.
   166  	overall model.Time
   167  	// byUser holds latest retention start time considering only per user retention config.
   168  	// It is used to determine if user index table may have any expired chunks.
   169  	byUser map[string]model.Time
   170  }
   171  
   172  // findLatestRetentionStartTime returns the latest retention start time overall, just default config and by each user.
   173  func findLatestRetentionStartTime(now model.Time, limits Limits) latestRetentionStartTime {
   174  	// find the smallest retention period from default limits
   175  	defaultLimits := limits.DefaultLimits()
   176  	smallestDefaultRetentionPeriod := defaultLimits.RetentionPeriod
   177  	for _, streamRetention := range defaultLimits.StreamRetention {
   178  		if streamRetention.Period < smallestDefaultRetentionPeriod {
   179  			smallestDefaultRetentionPeriod = streamRetention.Period
   180  		}
   181  	}
   182  
   183  	overallSmallestRetentionPeriod := smallestDefaultRetentionPeriod
   184  
   185  	// find the smallest retention period by user
   186  	limitsByUserID := limits.AllByUserID()
   187  	smallestRetentionPeriodByUser := make(map[string]model.Time, len(limitsByUserID))
   188  	for userID, limit := range limitsByUserID {
   189  		smallestRetentionPeriodForUser := limit.RetentionPeriod
   190  		for _, streamRetention := range limit.StreamRetention {
   191  			if streamRetention.Period < smallestRetentionPeriodForUser {
   192  				smallestRetentionPeriodForUser = streamRetention.Period
   193  			}
   194  		}
   195  
   196  		// update the overallSmallestRetentionPeriod if this user has smaller value
   197  		smallestRetentionPeriodByUser[userID] = now.Add(time.Duration(-smallestRetentionPeriodForUser))
   198  		if smallestRetentionPeriodForUser < overallSmallestRetentionPeriod {
   199  			overallSmallestRetentionPeriod = smallestRetentionPeriodForUser
   200  		}
   201  	}
   202  
   203  	return latestRetentionStartTime{
   204  		defaults: now.Add(time.Duration(-smallestDefaultRetentionPeriod)),
   205  		overall:  now.Add(time.Duration(-overallSmallestRetentionPeriod)),
   206  		byUser:   smallestRetentionPeriodByUser,
   207  	}
   208  }