github.com/grafana/pyroscope@v1.18.0/pkg/distributor/ingestlimits/sampler.go (about)

     1  package ingestlimits
     2  
     3  import (
     4  	"context"
     5  	"math/rand"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/grafana/dskit/services"
    10  )
    11  
    12  type tenantTracker struct {
    13  	mu                sync.Mutex
    14  	lastRequestTime   time.Time
    15  	remainingRequests int
    16  }
    17  
    18  // Sampler provides a very simple time-based probabilistic sampling,
    19  // intended to be used when a tenant limit has been reached.
    20  //
    21  // The sampler will allow a number of requests in a time interval.
    22  // Once the interval is over, the number of allowed requests resets.
    23  //
    24  // We introduce a probability function for a request to be allowed defined as 1 / num_replicas,
    25  // to account for the size of the cluster and because tracking is done in memory.
    26  type Sampler struct {
    27  	*services.BasicService
    28  
    29  	mu      sync.RWMutex
    30  	tenants map[string]*tenantTracker
    31  
    32  	// needed for adjusting the probability function with the number of replicas
    33  	instanceCountProvider InstanceCountProvider
    34  
    35  	// cleanup of the tenants map to prevent build-up
    36  	cleanupInterval time.Duration
    37  	maxAge          time.Duration
    38  	closeOnce       sync.Once
    39  	stop            chan struct{}
    40  	done            chan struct{}
    41  }
    42  
    43  type InstanceCountProvider interface {
    44  	InstancesCount() int
    45  }
    46  
    47  func NewSampler(instanceCount InstanceCountProvider) *Sampler {
    48  	s := &Sampler{
    49  		tenants:               make(map[string]*tenantTracker),
    50  		instanceCountProvider: instanceCount,
    51  		cleanupInterval:       1 * time.Hour,
    52  		maxAge:                24 * time.Hour,
    53  		stop:                  make(chan struct{}),
    54  		done:                  make(chan struct{}),
    55  	}
    56  	s.BasicService = services.NewBasicService(
    57  		s.starting,
    58  		s.running,
    59  		s.stopping,
    60  	)
    61  
    62  	return s
    63  }
    64  
    65  func (s *Sampler) starting(_ context.Context) error { return nil }
    66  
    67  func (s *Sampler) stopping(_ error) error {
    68  	s.closeOnce.Do(func() {
    69  		close(s.stop)
    70  		<-s.done
    71  	})
    72  	return nil
    73  }
    74  
    75  func (s *Sampler) running(ctx context.Context) error {
    76  	t := time.NewTicker(s.cleanupInterval)
    77  	defer func() {
    78  		t.Stop()
    79  		close(s.done)
    80  	}()
    81  	for {
    82  		select {
    83  		case <-t.C:
    84  			s.removeStaleTenants()
    85  		case <-s.stop:
    86  			return nil
    87  		case <-ctx.Done():
    88  			return nil
    89  		}
    90  	}
    91  }
    92  
    93  func (s *Sampler) AllowRequest(tenantID string, config SamplingConfig) bool {
    94  	s.mu.Lock()
    95  	tracker, exists := s.tenants[tenantID]
    96  	if !exists {
    97  		tracker = &tenantTracker{
    98  			lastRequestTime:   time.Now(),
    99  			remainingRequests: config.NumRequests,
   100  		}
   101  		s.tenants[tenantID] = tracker
   102  	}
   103  	s.mu.Unlock()
   104  
   105  	return tracker.AllowRequest(s.instanceCountProvider.InstancesCount(), config.Period, config.NumRequests)
   106  }
   107  
   108  func (b *tenantTracker) AllowRequest(replicaCount int, windowDuration time.Duration, maxRequests int) bool {
   109  	b.mu.Lock()
   110  	defer b.mu.Unlock()
   111  
   112  	now := time.Now()
   113  
   114  	// reset tracking data if enough time has passed
   115  	if now.Sub(b.lastRequestTime) >= windowDuration {
   116  		b.lastRequestTime = now
   117  		b.remainingRequests = maxRequests
   118  	}
   119  
   120  	if b.remainingRequests > 0 {
   121  		// random chance of allowing request, adjusting for the number of replicas
   122  		shouldAllow := rand.Float64() < float64(maxRequests)/float64(replicaCount)
   123  
   124  		if shouldAllow {
   125  			b.remainingRequests--
   126  			return true
   127  		}
   128  	}
   129  
   130  	return false
   131  }
   132  
   133  func (s *Sampler) removeStaleTenants() {
   134  	s.mu.Lock()
   135  	cutoff := time.Now().Add(-s.maxAge)
   136  	for tenantID, tracker := range s.tenants {
   137  		if tracker.lastRequestTime.Before(cutoff) {
   138  			delete(s.tenants, tenantID)
   139  		}
   140  	}
   141  	s.mu.Unlock()
   142  }