github.com/grafana/pyroscope@v1.18.0/pkg/ingester/limiter.go (about) 1 package ingester 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 "github.com/prometheus/common/model" 9 "github.com/samber/lo" 10 11 phlaremodel "github.com/grafana/pyroscope/pkg/model" 12 "github.com/grafana/pyroscope/pkg/util" 13 "github.com/grafana/pyroscope/pkg/validation" 14 ) 15 16 var ( 17 activeSeriesTimeout = 10 * time.Minute 18 activeSeriesCleanup = time.Minute 19 ) 20 21 type RingCount interface { 22 HealthyInstancesCount() int 23 } 24 25 type Limits interface { 26 MaxLocalSeriesPerTenant(tenantID string) int 27 MaxGlobalSeriesPerTenant(tenantID string) int 28 IngestionTenantShardSize(tenantID string) int 29 DistributorUsageGroups(tenantID string) *validation.UsageGroupConfig 30 } 31 32 type Limiter interface { 33 // AllowProfile returns an error if the profile is not allowed to be ingested. 34 // The error is a validation error and can be out of order or max series limit reached. 35 AllowProfile(fp model.Fingerprint, lbs phlaremodel.Labels, tsNano int64) error 36 Stop() 37 } 38 39 type limiter struct { 40 limits Limits 41 ring RingCount 42 replicationFactor int 43 tenantID string 44 45 activeSeries map[model.Fingerprint]int64 46 47 mtx sync.Mutex // todo: may be shard the lock to avoid latency spikes. 48 49 ctx context.Context 50 cancel context.CancelFunc 51 wg sync.WaitGroup 52 } 53 54 func NewLimiter(tenantID string, limits Limits, ring RingCount, replicationFactor int) Limiter { 55 ctx, cancel := context.WithCancel(context.Background()) 56 57 l := &limiter{ 58 tenantID: tenantID, 59 limits: limits, 60 ring: ring, 61 replicationFactor: replicationFactor, 62 activeSeries: map[model.Fingerprint]int64{}, 63 cancel: cancel, 64 ctx: ctx, 65 } 66 67 l.wg.Add(1) 68 go l.loop() 69 70 return l 71 } 72 73 func (l *limiter) Stop() { 74 l.cancel() 75 l.wg.Wait() 76 } 77 78 func (l *limiter) loop() { 79 defer l.wg.Done() 80 81 ticker := time.NewTicker(activeSeriesCleanup) 82 defer ticker.Stop() 83 84 for { 85 select { 86 case <-ticker.C: 87 l.cleanup() 88 case <-l.ctx.Done(): 89 return 90 } 91 } 92 } 93 94 // cleanup removes the series that have not been used for a while. 95 func (l *limiter) cleanup() { 96 now := time.Now().UnixNano() 97 l.mtx.Lock() 98 defer l.mtx.Unlock() 99 100 for fp, lastUsed := range l.activeSeries { 101 if now-lastUsed > int64(activeSeriesTimeout) { 102 delete(l.activeSeries, fp) 103 } 104 } 105 } 106 107 func (l *limiter) AllowProfile(fp model.Fingerprint, lbs phlaremodel.Labels, tsNano int64) error { 108 l.mtx.Lock() 109 defer l.mtx.Unlock() 110 return l.allowNewSeries(fp) 111 } 112 113 func (l *limiter) allowNewSeries(fp model.Fingerprint) error { 114 _, ok := l.activeSeries[fp] 115 series := len(l.activeSeries) 116 if !ok { 117 // can this series be added? 118 if err := l.assertMaxSeriesPerUser(l.tenantID, series); err != nil { 119 return err 120 } 121 } 122 123 // update time or add it 124 l.activeSeries[fp] = time.Now().UnixNano() 125 return nil 126 } 127 128 func (l *limiter) assertMaxSeriesPerUser(tenantID string, series int) error { 129 // Start by setting the local limit either from override or default 130 localLimit := l.limits.MaxLocalSeriesPerTenant(tenantID) 131 132 // We can assume that series are evenly distributed across ingesters 133 // so we do convert the global limit into a local limit 134 globalLimit := l.limits.MaxGlobalSeriesPerTenant(tenantID) 135 adjustedGlobalLimit := l.convertGlobalToLocalLimit(tenantID, globalLimit) 136 137 // Set the calculated limit to the lesser of the local limit or the new calculated global limit 138 calculatedLimit := minNonZero(localLimit, adjustedGlobalLimit) 139 140 // If both the local and global limits are disabled, we just 141 // use the largest int value 142 if calculatedLimit == 0 { 143 return nil 144 } 145 146 if series < calculatedLimit { 147 return nil 148 } 149 return validation.NewErrorf(validation.SeriesLimit, validation.SeriesLimitErrorMsg, series, calculatedLimit) 150 } 151 152 func (l *limiter) convertGlobalToLocalLimit(tenantID string, globalLimit int) int { 153 if globalLimit == 0 { 154 return 0 155 } 156 157 // Given we don't need a super accurate count (ie. when the ingesters 158 // topology changes) and we prefer to always be in favor of the tenant, 159 // we can use a per-ingester limit equal to: 160 // (global limit / number of ingesters) * replication factor 161 numIngesters := l.ring.HealthyInstancesCount() 162 163 // No healthy ingester may happen because the number of ingesters is asynchronously updated. 164 // If happens, we just temporarily ignore the global limit. 165 if numIngesters == 0 { 166 return 0 167 } 168 169 // If the number of available ingesters is greater than the tenant's shard 170 // size, then we should honor the shard size because series/metadata won't 171 // be written to more ingesters than it. 172 if shardSize := l.limits.IngestionTenantShardSize(tenantID); shardSize > 0 { 173 // We use Min() to protect from the case the expected shard size is > available ingesters. 174 numIngesters = lo.Min([]int{numIngesters, util.ShuffleShardExpectedInstances(shardSize, 1)}) 175 } 176 177 return int((float64(globalLimit) / float64(numIngesters)) * float64(l.replicationFactor)) 178 } 179 180 func minNonZero(first, second int) int { 181 if first == 0 || (second != 0 && first > second) { 182 return second 183 } 184 185 return first 186 }