github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/limiter.go

github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/limiter.go (about)

     1  package ingester
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sync"
     7  	"time"
     8  
     9  	"golang.org/x/time/rate"
    10  
    11  	"github.com/grafana/loki/pkg/validation"
    12  )
    13  
    14  const (
    15  	errMaxStreamsPerUserLimitExceeded = "tenant '%v' per-user streams limit exceeded, streams: %d exceeds calculated limit: %d (local limit: %d, global limit: %d, global/ingesters: %d)"
    16  )
    17  
    18  // RingCount is the interface exposed by a ring implementation which allows
    19  // to count members
    20  type RingCount interface {
    21  	HealthyInstancesCount() int
    22  }
    23  
    24  // Limiter implements primitives to get the maximum number of streams
    25  // an ingester can handle for a specific tenant
    26  type Limiter struct {
    27  	limits            *validation.Overrides
    28  	ring              RingCount
    29  	replicationFactor int
    30  	metrics           *ingesterMetrics
    31  
    32  	mtx      sync.RWMutex
    33  	disabled bool
    34  }
    35  
    36  func (l *Limiter) DisableForWALReplay() {
    37  	l.mtx.Lock()
    38  	defer l.mtx.Unlock()
    39  	l.disabled = true
    40  	l.metrics.limiterEnabled.Set(0)
    41  }
    42  
    43  func (l *Limiter) Enable() {
    44  	l.mtx.Lock()
    45  	defer l.mtx.Unlock()
    46  	l.disabled = false
    47  	l.metrics.limiterEnabled.Set(1)
    48  }
    49  
    50  // NewLimiter makes a new limiter
    51  func NewLimiter(limits *validation.Overrides, metrics *ingesterMetrics, ring RingCount, replicationFactor int) *Limiter {
    52  	return &Limiter{
    53  		limits:            limits,
    54  		ring:              ring,
    55  		replicationFactor: replicationFactor,
    56  		metrics:           metrics,
    57  	}
    58  }
    59  
    60  func (l *Limiter) UnorderedWrites(userID string) bool {
    61  	// WAL replay should not discard previously ack'd writes,
    62  	// so allow out of order writes while the limiter is disabled.
    63  	// This allows replaying unordered WALs into ordered configurations.
    64  	if l.disabled {
    65  		return true
    66  	}
    67  	return l.limits.UnorderedWrites(userID)
    68  }
    69  
    70  // AssertMaxStreamsPerUser ensures limit has not been reached compared to the current
    71  // number of streams in input and returns an error if so.
    72  func (l *Limiter) AssertMaxStreamsPerUser(userID string, streams int) error {
    73  	// Until the limiter actually starts, all accesses are successful.
    74  	// This is used to disable limits while recovering from the WAL.
    75  	l.mtx.RLock()
    76  	defer l.mtx.RUnlock()
    77  	if l.disabled {
    78  		return nil
    79  	}
    80  
    81  	// Start by setting the local limit either from override or default
    82  	localLimit := l.limits.MaxLocalStreamsPerUser(userID)
    83  
    84  	// We can assume that streams are evenly distributed across ingesters
    85  	// so we do convert the global limit into a local limit
    86  	globalLimit := l.limits.MaxGlobalStreamsPerUser(userID)
    87  	adjustedGlobalLimit := l.convertGlobalToLocalLimit(globalLimit)
    88  
    89  	// Set the calculated limit to the lesser of the local limit or the new calculated global limit
    90  	calculatedLimit := l.minNonZero(localLimit, adjustedGlobalLimit)
    91  
    92  	// If both the local and global limits are disabled, we just
    93  	// use the largest int value
    94  	if calculatedLimit == 0 {
    95  		calculatedLimit = math.MaxInt32
    96  	}
    97  
    98  	if streams < calculatedLimit {
    99  		return nil
   100  	}
   101  
   102  	return fmt.Errorf(errMaxStreamsPerUserLimitExceeded, userID, streams, calculatedLimit, localLimit, globalLimit, adjustedGlobalLimit)
   103  }
   104  
   105  func (l *Limiter) convertGlobalToLocalLimit(globalLimit int) int {
   106  	if globalLimit == 0 {
   107  		return 0
   108  	}
   109  
   110  	// Given we don't need a super accurate count (ie. when the ingesters
   111  	// topology changes) and we prefer to always be in favor of the tenant,
   112  	// we can use a per-ingester limit equal to:
   113  	// (global limit / number of ingesters) * replication factor
   114  	numIngesters := l.ring.HealthyInstancesCount()
   115  
   116  	// May happen because the number of ingesters is asynchronously updated.
   117  	// If happens, we just temporarily ignore the global limit.
   118  	if numIngesters > 0 {
   119  		return int((float64(globalLimit) / float64(numIngesters)) * float64(l.replicationFactor))
   120  	}
   121  
   122  	return 0
   123  }
   124  
   125  func (l *Limiter) minNonZero(first, second int) int {
   126  	if first == 0 || (second != 0 && first > second) {
   127  		return second
   128  	}
   129  
   130  	return first
   131  }
   132  
   133  type RateLimiterStrategy interface {
   134  	RateLimit(tenant string) validation.RateLimit
   135  }
   136  
   137  func (l *Limiter) RateLimit(tenant string) validation.RateLimit {
   138  	if l.disabled {
   139  		return validation.Unlimited
   140  	}
   141  
   142  	return l.limits.PerStreamRateLimit(tenant)
   143  }
   144  
   145  type StreamRateLimiter struct {
   146  	recheckPeriod time.Duration
   147  	recheckAt     time.Time
   148  	strategy      RateLimiterStrategy
   149  	tenant        string
   150  	lim           *rate.Limiter
   151  }
   152  
   153  func NewStreamRateLimiter(strategy RateLimiterStrategy, tenant string, recheckPeriod time.Duration) *StreamRateLimiter {
   154  	rl := strategy.RateLimit(tenant)
   155  	return &StreamRateLimiter{
   156  		recheckPeriod: recheckPeriod,
   157  		strategy:      strategy,
   158  		tenant:        tenant,
   159  		lim:           rate.NewLimiter(rl.Limit, rl.Burst),
   160  	}
   161  }
   162  
   163  func (l *StreamRateLimiter) AllowN(at time.Time, n int) bool {
   164  	now := time.Now()
   165  	if now.After(l.recheckAt) {
   166  		l.recheckAt = now.Add(l.recheckPeriod)
   167  
   168  		oldLim := l.lim.Limit()
   169  		oldBurst := l.lim.Burst()
   170  
   171  		next := l.strategy.RateLimit(l.tenant)
   172  
   173  		if oldLim != next.Limit || oldBurst != next.Burst {
   174  			// Edge case: rate.Inf doesn't advance nicely when reconfigured.
   175  			// To simplify, we just create a new limiter after reconfiguration rather
   176  			// than alter the existing one.
   177  			l.lim = rate.NewLimiter(next.Limit, next.Burst)
   178  		}
   179  	}
   180  
   181  	return l.lim.AllowN(at, n)
   182  }