github.com/grafana/pyroscope@v1.18.0/pkg/distributor/aggregator/aggregator.go (about) 1 package aggregator 2 3 import ( 4 "sync" 5 "sync/atomic" 6 "time" 7 ) 8 9 // Aggregator aggregates values within 10 // a time window over a period of time. 11 type Aggregator[T any] struct { 12 window int64 13 period int64 14 now func() int64 15 16 m sync.RWMutex 17 tracker *tracker 18 aggregates map[aggregationKey]*AggregationResult[T] 19 20 close chan struct{} 21 done chan struct{} 22 stats stats 23 } 24 25 type stats struct { 26 activeAggregates atomic.Int64 27 activeSeries atomic.Uint64 28 aggregated atomic.Uint64 29 errors atomic.Uint64 30 } 31 32 func NewAggregator[T any](window, period time.Duration) *Aggregator[T] { 33 if window < period { 34 window = period 35 } 36 return &Aggregator[T]{ 37 window: window.Nanoseconds(), 38 period: period.Nanoseconds(), 39 now: timeNow, 40 tracker: newTracker(8, 64), 41 // NOTE(kolesnikovae): probably should be sharded as well. 42 aggregates: make(map[aggregationKey]*AggregationResult[T], 256), 43 close: make(chan struct{}), 44 done: make(chan struct{}), 45 } 46 } 47 48 func timeNow() int64 { return time.Now().UnixNano() } 49 50 func (a *Aggregator[T]) Start() { 51 t := time.NewTicker(time.Duration(a.period)) 52 defer func() { 53 t.Stop() 54 close(a.done) 55 }() 56 for { 57 select { 58 case <-a.close: 59 return 60 case <-t.C: 61 a.prune(a.now()) 62 } 63 } 64 } 65 66 // Stop the aggregator. It does not wait for ongoing aggregations 67 // to complete as no aggregation requests expected during shutdown. 68 func (a *Aggregator[T]) Stop() { 69 close(a.close) 70 <-a.done 71 } 72 73 type AggregateFn[T any] func(T) (T, error) 74 75 func (a *Aggregator[T]) Aggregate(key uint64, timestamp int64, fn AggregateFn[T]) (*AggregationResult[T], bool, error) { 76 // Return early if the event rate is too low for aggregation. 77 now := a.now() 78 lastUpdated := a.tracker.update(key, now) 79 delta := now - lastUpdated // Negative delta is possible. 80 // Distance between two updates is longer than the aggregation period. 81 lowRate := 0 < delta && delta > a.period 82 if lastUpdated == 0 || lowRate { 83 return nil, false, nil 84 } 85 k := a.aggregationKey(key, timestamp) 86 a.m.Lock() 87 x, ok := a.aggregates[k] 88 if !ok { 89 a.stats.activeAggregates.Add(1) 90 x = &AggregationResult[T]{ 91 key: k, 92 owner: make(chan struct{}, 1), 93 done: make(chan struct{}), 94 } 95 a.aggregates[k] = x 96 go a.waitResult(x) 97 } 98 x.wg.Add(1) 99 defer x.wg.Done() 100 a.m.Unlock() 101 select { 102 default: 103 case <-x.done: 104 // Aggregation has failed. 105 return x, true, x.err 106 } 107 var err error 108 x.m.Lock() 109 x.value, err = fn(x.value) 110 x.m.Unlock() 111 if err != nil { 112 a.stats.errors.Add(1) 113 x.Close(err) 114 } else { 115 a.stats.aggregated.Add(1) 116 } 117 return x, true, err 118 } 119 120 func (a *Aggregator[T]) aggregationKey(key uint64, timestamp int64) aggregationKey { 121 return aggregationKey{ 122 timestamp: (timestamp / a.window) * a.window, 123 key: key, 124 } 125 } 126 127 type aggregationKey struct { 128 key uint64 129 timestamp int64 130 } 131 132 func (a *Aggregator[T]) waitResult(x *AggregationResult[T]) { 133 // The value life-time is limited to the aggregation 134 // window duration. 135 var failed bool 136 select { 137 case <-time.After(time.Duration(a.period)): 138 case <-x.done: 139 failed = true 140 } 141 a.m.Lock() 142 delete(a.aggregates, x.key) 143 a.m.Unlock() 144 a.stats.activeAggregates.Add(-1) 145 if !failed { 146 // Wait for ongoing aggregations to finish. 147 x.wg.Wait() 148 // Notify the owner: it must handle the aggregate 149 // and close it, propagating any error occurred. 150 x.owner <- struct{}{} 151 } 152 } 153 154 // prune removes keys that have not been updating since 155 // the beginning of the preceding aggregation period. 156 func (a *Aggregator[T]) prune(deadline int64) { 157 a.tracker.prune(deadline - a.period) 158 a.stats.activeSeries.Store(uint64(a.tracker.len())) 159 } 160 161 type AggregationResult[T any] struct { 162 key aggregationKey 163 handled atomic.Bool 164 owner chan struct{} 165 m sync.Mutex 166 value T 167 168 wg sync.WaitGroup 169 close sync.Once 170 done chan struct{} 171 err error 172 } 173 174 // Wait blocks until the aggregation finishes. 175 // The block duration never exceeds aggregation period. 176 func (r *AggregationResult[T]) Wait() error { 177 select { 178 case <-r.owner: 179 case <-r.done: 180 } 181 return r.err 182 } 183 184 // Close notifies all the contributors about the error 185 // encountered. Owner of the aggregated result must 186 // propagate any processing error happened with the value. 187 func (r *AggregationResult[T]) Close(err error) { 188 r.close.Do(func() { 189 r.err = err 190 close(r.done) 191 }) 192 } 193 194 // Value returns the aggregated value and indicates 195 // whether the caller owns it. 196 func (r *AggregationResult[T]) Value() (v T, ok bool) { 197 return r.value, !r.handled.Swap(true) 198 } 199 200 // Handler returns a handler of the aggregated result. 201 // The handler is nil, if it has already been acquired. 202 // The returned function is synchronous and blocks for 203 // up to the aggregation period duration. 204 func (r *AggregationResult[T]) Handler() func() (T, error) { 205 if !r.handled.Swap(true) { 206 return r.handle 207 } 208 return nil 209 } 210 211 func (r *AggregationResult[T]) handle() (v T, err error) { 212 defer r.Close(err) 213 if err = r.Wait(); err != nil { 214 return v, err 215 } 216 return r.value, r.err 217 } 218 219 type tracker struct{ shards []*shard } 220 221 func newTracker(shards int, shardSize uint32) *tracker { 222 t := tracker{shards: make([]*shard, shards)} 223 for i := range t.shards { 224 t.shards[i] = &shard{v: make(map[uint64]int64, shardSize)} 225 } 226 return &t 227 } 228 229 func (t *tracker) shard(k uint64) *shard { return t.shards[k%uint64(len(t.shards))] } 230 func (t *tracker) update(k uint64, n int64) int64 { return t.shard(k).update(k, n) } 231 232 // prune removes keys with values less than n. 233 func (t *tracker) prune(n int64) { 234 for _, x := range t.shards { 235 x.prune(n) 236 } 237 } 238 239 func (t *tracker) len() int { 240 var n int 241 for _, x := range t.shards { 242 n += x.len() 243 } 244 return n 245 } 246 247 type shard struct { 248 m sync.Mutex 249 v map[uint64]int64 250 s int 251 } 252 253 func (s *shard) update(k uint64, n int64) int64 { 254 s.m.Lock() 255 v := s.v[k] 256 s.v[k] = n 257 s.m.Unlock() 258 return v 259 } 260 261 func (s *shard) prune(n int64) { 262 s.m.Lock() 263 s.s = len(s.v) 264 for k, v := range s.v { 265 if v <= n { 266 delete(s.v, k) 267 s.s-- 268 } 269 } 270 s.m.Unlock() 271 } 272 273 func (s *shard) len() int { 274 s.m.Lock() 275 v := s.s 276 s.m.Unlock() 277 return v 278 }