github.com/grafana/pyroscope@v1.18.0/pkg/distributor/ingestlimits/sampler.go (about) 1 package ingestlimits 2 3 import ( 4 "context" 5 "math/rand" 6 "sync" 7 "time" 8 9 "github.com/grafana/dskit/services" 10 ) 11 12 type tenantTracker struct { 13 mu sync.Mutex 14 lastRequestTime time.Time 15 remainingRequests int 16 } 17 18 // Sampler provides a very simple time-based probabilistic sampling, 19 // intended to be used when a tenant limit has been reached. 20 // 21 // The sampler will allow a number of requests in a time interval. 22 // Once the interval is over, the number of allowed requests resets. 23 // 24 // We introduce a probability function for a request to be allowed defined as 1 / num_replicas, 25 // to account for the size of the cluster and because tracking is done in memory. 26 type Sampler struct { 27 *services.BasicService 28 29 mu sync.RWMutex 30 tenants map[string]*tenantTracker 31 32 // needed for adjusting the probability function with the number of replicas 33 instanceCountProvider InstanceCountProvider 34 35 // cleanup of the tenants map to prevent build-up 36 cleanupInterval time.Duration 37 maxAge time.Duration 38 closeOnce sync.Once 39 stop chan struct{} 40 done chan struct{} 41 } 42 43 type InstanceCountProvider interface { 44 InstancesCount() int 45 } 46 47 func NewSampler(instanceCount InstanceCountProvider) *Sampler { 48 s := &Sampler{ 49 tenants: make(map[string]*tenantTracker), 50 instanceCountProvider: instanceCount, 51 cleanupInterval: 1 * time.Hour, 52 maxAge: 24 * time.Hour, 53 stop: make(chan struct{}), 54 done: make(chan struct{}), 55 } 56 s.BasicService = services.NewBasicService( 57 s.starting, 58 s.running, 59 s.stopping, 60 ) 61 62 return s 63 } 64 65 func (s *Sampler) starting(_ context.Context) error { return nil } 66 67 func (s *Sampler) stopping(_ error) error { 68 s.closeOnce.Do(func() { 69 close(s.stop) 70 <-s.done 71 }) 72 return nil 73 } 74 75 func (s *Sampler) running(ctx context.Context) error { 76 t := time.NewTicker(s.cleanupInterval) 77 defer func() { 78 t.Stop() 79 close(s.done) 80 }() 81 for { 82 select { 83 case <-t.C: 84 s.removeStaleTenants() 85 case <-s.stop: 86 return nil 87 case <-ctx.Done(): 88 return nil 89 } 90 } 91 } 92 93 func (s *Sampler) AllowRequest(tenantID string, config SamplingConfig) bool { 94 s.mu.Lock() 95 tracker, exists := s.tenants[tenantID] 96 if !exists { 97 tracker = &tenantTracker{ 98 lastRequestTime: time.Now(), 99 remainingRequests: config.NumRequests, 100 } 101 s.tenants[tenantID] = tracker 102 } 103 s.mu.Unlock() 104 105 return tracker.AllowRequest(s.instanceCountProvider.InstancesCount(), config.Period, config.NumRequests) 106 } 107 108 func (b *tenantTracker) AllowRequest(replicaCount int, windowDuration time.Duration, maxRequests int) bool { 109 b.mu.Lock() 110 defer b.mu.Unlock() 111 112 now := time.Now() 113 114 // reset tracking data if enough time has passed 115 if now.Sub(b.lastRequestTime) >= windowDuration { 116 b.lastRequestTime = now 117 b.remainingRequests = maxRequests 118 } 119 120 if b.remainingRequests > 0 { 121 // random chance of allowing request, adjusting for the number of replicas 122 shouldAllow := rand.Float64() < float64(maxRequests)/float64(replicaCount) 123 124 if shouldAllow { 125 b.remainingRequests-- 126 return true 127 } 128 } 129 130 return false 131 } 132 133 func (s *Sampler) removeStaleTenants() { 134 s.mu.Lock() 135 cutoff := time.Now().Add(-s.maxAge) 136 for tenantID, tracker := range s.tenants { 137 if tracker.lastRequestTime.Before(cutoff) { 138 delete(s.tenants, tenantID) 139 } 140 } 141 s.mu.Unlock() 142 }