github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/distribution_stats.go (about)

     1  package adaptiveplacement
     2  
     3  import (
     4  	"math"
     5  	"slices"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/grafana/pyroscope/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/adaptive_placementpb"
    11  	"github.com/grafana/pyroscope/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/ewma"
    12  
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  )
    15  
    16  // DistributionStats is a helper struct that tracks the data rate of each
    17  // dataset within a certain time window. EWMA aggregation function is used
    18  // to calculate the instantaneous rate of the dataset, the time window is
    19  // half-life of the EWMA function.
    20  //
    21  // DistributionStats is safe for concurrent use.
    22  type DistributionStats struct {
    23  	mu       sync.Mutex
    24  	counters map[counterKey]*ewma.Rate
    25  	window   time.Duration
    26  }
    27  
    28  func NewDistributionStats(window time.Duration) *DistributionStats {
    29  	return &DistributionStats{
    30  		counters: make(map[counterKey]*ewma.Rate),
    31  		window:   window,
    32  	}
    33  }
    34  
    35  type Sample struct {
    36  	TenantID    string
    37  	DatasetName string
    38  	ShardOwner  string
    39  	ShardID     uint32
    40  	Size        uint64
    41  }
    42  
    43  func (d *DistributionStats) RecordStats(samples iter.Iterator[Sample]) {
    44  	d.recordStats(time.Now().UnixNano(), samples)
    45  }
    46  
    47  func (d *DistributionStats) Build() *adaptive_placementpb.DistributionStats {
    48  	return d.build(time.Now().UnixNano())
    49  }
    50  
    51  func (d *DistributionStats) Expire(before time.Time) {
    52  	d.mu.Lock()
    53  	defer d.mu.Unlock()
    54  	for k, v := range d.counters {
    55  		if v.LastUpdate().Before(before) {
    56  			delete(d.counters, k)
    57  		}
    58  	}
    59  }
    60  
    61  func (d *DistributionStats) recordStats(now int64, samples iter.Iterator[Sample]) {
    62  	d.mu.Lock()
    63  	defer d.mu.Unlock()
    64  	for samples.Next() {
    65  		s := samples.At()
    66  		// TODO(kolesnikovae): intern strings with unique (go 1.23)
    67  		c := d.counter(counterKey{
    68  			tenant:  s.TenantID,
    69  			dataset: s.DatasetName,
    70  			shard: shard{
    71  				owner: s.ShardOwner,
    72  				id:    s.ShardID,
    73  			},
    74  		})
    75  		c.UpdateAt(float64(s.Size), now)
    76  	}
    77  }
    78  
    79  func (d *DistributionStats) counter(k counterKey) *ewma.Rate {
    80  	c, ok := d.counters[k]
    81  	if !ok {
    82  		c = ewma.NewHalfLife(d.window)
    83  		d.counters[k] = c
    84  	}
    85  	return c
    86  }
    87  
    88  type counterKey struct {
    89  	tenant  string
    90  	dataset string
    91  	shard   shard
    92  }
    93  
    94  func (k counterKey) compare(x counterKey) int {
    95  	if c := strings.Compare(k.tenant, x.tenant); c != 0 {
    96  		return c
    97  	}
    98  	if c := strings.Compare(k.dataset, x.dataset); c != 0 {
    99  		return c
   100  	}
   101  	if k.shard.id != x.shard.id {
   102  		return int(k.shard.id) - int(x.shard.id)
   103  	}
   104  	return strings.Compare(k.shard.owner, x.shard.owner)
   105  }
   106  
   107  type shard struct {
   108  	owner string
   109  	id    uint32
   110  }
   111  
   112  func (d *DistributionStats) build(now int64) *adaptive_placementpb.DistributionStats {
   113  	d.mu.Lock()
   114  	defer d.mu.Unlock()
   115  
   116  	tenants := make(map[string]int)
   117  	datasets := make(map[string]int)
   118  	shards := make(map[shard]int)
   119  
   120  	// Although, not strictly required, we iterate over the keys
   121  	// in a deterministic order to make the output deterministic.
   122  	keys := make([]counterKey, 0, len(d.counters))
   123  	for k := range d.counters {
   124  		keys = append(keys, k)
   125  	}
   126  	slices.SortFunc(keys, func(a, b counterKey) int {
   127  		return a.compare(b)
   128  	})
   129  
   130  	stats := &adaptive_placementpb.DistributionStats{CreatedAt: now}
   131  	for _, k := range keys {
   132  		c := d.counters[k]
   133  		// Skip dataset-wide counters.
   134  		if k.shard.id == 0 {
   135  			continue
   136  		}
   137  
   138  		ti, ok := tenants[k.tenant]
   139  		if !ok {
   140  			ti = len(stats.Tenants)
   141  			tenants[k.tenant] = ti
   142  			stats.Tenants = append(stats.Tenants, &adaptive_placementpb.TenantStats{
   143  				TenantId: k.tenant,
   144  			})
   145  		}
   146  
   147  		di, ok := datasets[k.dataset]
   148  		if !ok {
   149  			di = len(stats.Datasets)
   150  			datasets[k.dataset] = di
   151  			stats.Datasets = append(stats.Datasets, &adaptive_placementpb.DatasetStats{
   152  				Tenant: uint32(ti),
   153  				Name:   k.dataset,
   154  			})
   155  		}
   156  
   157  		si, ok := shards[k.shard]
   158  		if !ok {
   159  			si = len(stats.Shards)
   160  			shards[k.shard] = si
   161  			stats.Shards = append(stats.Shards, &adaptive_placementpb.ShardStats{
   162  				Id:    k.shard.id,
   163  				Owner: k.shard.owner,
   164  			})
   165  		}
   166  
   167  		ds := stats.Datasets[di]
   168  		ds.Shards = append(ds.Shards, uint32(si))
   169  		ds.Usage = append(ds.Usage, uint64(math.Round(c.ValueAt(now))))
   170  	}
   171  
   172  	for _, dataset := range stats.Datasets {
   173  		c := d.counter(counterKey{
   174  			tenant:  stats.Tenants[dataset.Tenant].TenantId,
   175  			dataset: dataset.Name,
   176  		})
   177  		// Unlike the shard counters, we update the dataset-wide
   178  		// counters at the build time.
   179  		c.UpdateAt(float64(stdDev(dataset.Usage)), now)
   180  		dataset.StdDev = uint64(math.Round(c.ValueAt(now)))
   181  	}
   182  
   183  	return stats
   184  }