github.com/grafana/pyroscope@v1.18.0/pkg/distributor/aggregator/aggregator.go

github.com/grafana/pyroscope@v1.18.0/pkg/distributor/aggregator/aggregator.go (about)

     1  package aggregator
     2  
     3  import (
     4  	"sync"
     5  	"sync/atomic"
     6  	"time"
     7  )
     8  
     9  // Aggregator aggregates values within
    10  // a time window over a period of time.
    11  type Aggregator[T any] struct {
    12  	window int64
    13  	period int64
    14  	now    func() int64
    15  
    16  	m          sync.RWMutex
    17  	tracker    *tracker
    18  	aggregates map[aggregationKey]*AggregationResult[T]
    19  
    20  	close chan struct{}
    21  	done  chan struct{}
    22  	stats stats
    23  }
    24  
    25  type stats struct {
    26  	activeAggregates atomic.Int64
    27  	activeSeries     atomic.Uint64
    28  	aggregated       atomic.Uint64
    29  	errors           atomic.Uint64
    30  }
    31  
    32  func NewAggregator[T any](window, period time.Duration) *Aggregator[T] {
    33  	if window < period {
    34  		window = period
    35  	}
    36  	return &Aggregator[T]{
    37  		window:  window.Nanoseconds(),
    38  		period:  period.Nanoseconds(),
    39  		now:     timeNow,
    40  		tracker: newTracker(8, 64),
    41  		// NOTE(kolesnikovae): probably should be sharded as well.
    42  		aggregates: make(map[aggregationKey]*AggregationResult[T], 256),
    43  		close:      make(chan struct{}),
    44  		done:       make(chan struct{}),
    45  	}
    46  }
    47  
    48  func timeNow() int64 { return time.Now().UnixNano() }
    49  
    50  func (a *Aggregator[T]) Start() {
    51  	t := time.NewTicker(time.Duration(a.period))
    52  	defer func() {
    53  		t.Stop()
    54  		close(a.done)
    55  	}()
    56  	for {
    57  		select {
    58  		case <-a.close:
    59  			return
    60  		case <-t.C:
    61  			a.prune(a.now())
    62  		}
    63  	}
    64  }
    65  
    66  // Stop the aggregator. It does not wait for ongoing aggregations
    67  // to complete as no aggregation requests expected during shutdown.
    68  func (a *Aggregator[T]) Stop() {
    69  	close(a.close)
    70  	<-a.done
    71  }
    72  
    73  type AggregateFn[T any] func(T) (T, error)
    74  
    75  func (a *Aggregator[T]) Aggregate(key uint64, timestamp int64, fn AggregateFn[T]) (*AggregationResult[T], bool, error) {
    76  	// Return early if the event rate is too low for aggregation.
    77  	now := a.now()
    78  	lastUpdated := a.tracker.update(key, now)
    79  	delta := now - lastUpdated // Negative delta is possible.
    80  	// Distance between two updates is longer than the aggregation period.
    81  	lowRate := 0 < delta && delta > a.period
    82  	if lastUpdated == 0 || lowRate {
    83  		return nil, false, nil
    84  	}
    85  	k := a.aggregationKey(key, timestamp)
    86  	a.m.Lock()
    87  	x, ok := a.aggregates[k]
    88  	if !ok {
    89  		a.stats.activeAggregates.Add(1)
    90  		x = &AggregationResult[T]{
    91  			key:   k,
    92  			owner: make(chan struct{}, 1),
    93  			done:  make(chan struct{}),
    94  		}
    95  		a.aggregates[k] = x
    96  		go a.waitResult(x)
    97  	}
    98  	x.wg.Add(1)
    99  	defer x.wg.Done()
   100  	a.m.Unlock()
   101  	select {
   102  	default:
   103  	case <-x.done:
   104  		// Aggregation has failed.
   105  		return x, true, x.err
   106  	}
   107  	var err error
   108  	x.m.Lock()
   109  	x.value, err = fn(x.value)
   110  	x.m.Unlock()
   111  	if err != nil {
   112  		a.stats.errors.Add(1)
   113  		x.Close(err)
   114  	} else {
   115  		a.stats.aggregated.Add(1)
   116  	}
   117  	return x, true, err
   118  }
   119  
   120  func (a *Aggregator[T]) aggregationKey(key uint64, timestamp int64) aggregationKey {
   121  	return aggregationKey{
   122  		timestamp: (timestamp / a.window) * a.window,
   123  		key:       key,
   124  	}
   125  }
   126  
   127  type aggregationKey struct {
   128  	key       uint64
   129  	timestamp int64
   130  }
   131  
   132  func (a *Aggregator[T]) waitResult(x *AggregationResult[T]) {
   133  	// The value life-time is limited to the aggregation
   134  	// window duration.
   135  	var failed bool
   136  	select {
   137  	case <-time.After(time.Duration(a.period)):
   138  	case <-x.done:
   139  		failed = true
   140  	}
   141  	a.m.Lock()
   142  	delete(a.aggregates, x.key)
   143  	a.m.Unlock()
   144  	a.stats.activeAggregates.Add(-1)
   145  	if !failed {
   146  		// Wait for ongoing aggregations to finish.
   147  		x.wg.Wait()
   148  		// Notify the owner: it must handle the aggregate
   149  		// and close it, propagating any error occurred.
   150  		x.owner <- struct{}{}
   151  	}
   152  }
   153  
   154  // prune removes keys that have not been updating since
   155  // the beginning of the preceding aggregation period.
   156  func (a *Aggregator[T]) prune(deadline int64) {
   157  	a.tracker.prune(deadline - a.period)
   158  	a.stats.activeSeries.Store(uint64(a.tracker.len()))
   159  }
   160  
   161  type AggregationResult[T any] struct {
   162  	key     aggregationKey
   163  	handled atomic.Bool
   164  	owner   chan struct{}
   165  	m       sync.Mutex
   166  	value   T
   167  
   168  	wg    sync.WaitGroup
   169  	close sync.Once
   170  	done  chan struct{}
   171  	err   error
   172  }
   173  
   174  // Wait blocks until the aggregation finishes.
   175  // The block duration never exceeds aggregation period.
   176  func (r *AggregationResult[T]) Wait() error {
   177  	select {
   178  	case <-r.owner:
   179  	case <-r.done:
   180  	}
   181  	return r.err
   182  }
   183  
   184  // Close notifies all the contributors about the error
   185  // encountered. Owner of the aggregated result must
   186  // propagate any processing error happened with the value.
   187  func (r *AggregationResult[T]) Close(err error) {
   188  	r.close.Do(func() {
   189  		r.err = err
   190  		close(r.done)
   191  	})
   192  }
   193  
   194  // Value returns the aggregated value and indicates
   195  // whether the caller owns it.
   196  func (r *AggregationResult[T]) Value() (v T, ok bool) {
   197  	return r.value, !r.handled.Swap(true)
   198  }
   199  
   200  // Handler returns a handler of the aggregated result.
   201  // The handler is nil, if it has already been acquired.
   202  // The returned function is synchronous and blocks for
   203  // up to the aggregation period duration.
   204  func (r *AggregationResult[T]) Handler() func() (T, error) {
   205  	if !r.handled.Swap(true) {
   206  		return r.handle
   207  	}
   208  	return nil
   209  }
   210  
   211  func (r *AggregationResult[T]) handle() (v T, err error) {
   212  	defer r.Close(err)
   213  	if err = r.Wait(); err != nil {
   214  		return v, err
   215  	}
   216  	return r.value, r.err
   217  }
   218  
   219  type tracker struct{ shards []*shard }
   220  
   221  func newTracker(shards int, shardSize uint32) *tracker {
   222  	t := tracker{shards: make([]*shard, shards)}
   223  	for i := range t.shards {
   224  		t.shards[i] = &shard{v: make(map[uint64]int64, shardSize)}
   225  	}
   226  	return &t
   227  }
   228  
   229  func (t *tracker) shard(k uint64) *shard          { return t.shards[k%uint64(len(t.shards))] }
   230  func (t *tracker) update(k uint64, n int64) int64 { return t.shard(k).update(k, n) }
   231  
   232  // prune removes keys with values less than n.
   233  func (t *tracker) prune(n int64) {
   234  	for _, x := range t.shards {
   235  		x.prune(n)
   236  	}
   237  }
   238  
   239  func (t *tracker) len() int {
   240  	var n int
   241  	for _, x := range t.shards {
   242  		n += x.len()
   243  	}
   244  	return n
   245  }
   246  
   247  type shard struct {
   248  	m sync.Mutex
   249  	v map[uint64]int64
   250  	s int
   251  }
   252  
   253  func (s *shard) update(k uint64, n int64) int64 {
   254  	s.m.Lock()
   255  	v := s.v[k]
   256  	s.v[k] = n
   257  	s.m.Unlock()
   258  	return v
   259  }
   260  
   261  func (s *shard) prune(n int64) {
   262  	s.m.Lock()
   263  	s.s = len(s.v)
   264  	for k, v := range s.v {
   265  		if v <= n {
   266  			delete(s.v, k)
   267  			s.s--
   268  		}
   269  	}
   270  	s.m.Unlock()
   271  }
   272  
   273  func (s *shard) len() int {
   274  	s.m.Lock()
   275  	v := s.s
   276  	s.m.Unlock()
   277  	return v
   278  }