github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/distribution_stats.go (about) 1 package adaptiveplacement 2 3 import ( 4 "math" 5 "slices" 6 "strings" 7 "sync" 8 "time" 9 10 "github.com/grafana/pyroscope/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/adaptive_placementpb" 11 "github.com/grafana/pyroscope/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/ewma" 12 13 "github.com/grafana/pyroscope/pkg/iter" 14 ) 15 16 // DistributionStats is a helper struct that tracks the data rate of each 17 // dataset within a certain time window. EWMA aggregation function is used 18 // to calculate the instantaneous rate of the dataset, the time window is 19 // half-life of the EWMA function. 20 // 21 // DistributionStats is safe for concurrent use. 22 type DistributionStats struct { 23 mu sync.Mutex 24 counters map[counterKey]*ewma.Rate 25 window time.Duration 26 } 27 28 func NewDistributionStats(window time.Duration) *DistributionStats { 29 return &DistributionStats{ 30 counters: make(map[counterKey]*ewma.Rate), 31 window: window, 32 } 33 } 34 35 type Sample struct { 36 TenantID string 37 DatasetName string 38 ShardOwner string 39 ShardID uint32 40 Size uint64 41 } 42 43 func (d *DistributionStats) RecordStats(samples iter.Iterator[Sample]) { 44 d.recordStats(time.Now().UnixNano(), samples) 45 } 46 47 func (d *DistributionStats) Build() *adaptive_placementpb.DistributionStats { 48 return d.build(time.Now().UnixNano()) 49 } 50 51 func (d *DistributionStats) Expire(before time.Time) { 52 d.mu.Lock() 53 defer d.mu.Unlock() 54 for k, v := range d.counters { 55 if v.LastUpdate().Before(before) { 56 delete(d.counters, k) 57 } 58 } 59 } 60 61 func (d *DistributionStats) recordStats(now int64, samples iter.Iterator[Sample]) { 62 d.mu.Lock() 63 defer d.mu.Unlock() 64 for samples.Next() { 65 s := samples.At() 66 // TODO(kolesnikovae): intern strings with unique (go 1.23) 67 c := d.counter(counterKey{ 68 tenant: s.TenantID, 69 dataset: s.DatasetName, 70 shard: shard{ 71 owner: s.ShardOwner, 72 id: s.ShardID, 73 }, 74 }) 75 c.UpdateAt(float64(s.Size), now) 76 } 77 } 78 79 func (d *DistributionStats) counter(k counterKey) *ewma.Rate { 80 c, ok := d.counters[k] 81 if !ok { 82 c = ewma.NewHalfLife(d.window) 83 d.counters[k] = c 84 } 85 return c 86 } 87 88 type counterKey struct { 89 tenant string 90 dataset string 91 shard shard 92 } 93 94 func (k counterKey) compare(x counterKey) int { 95 if c := strings.Compare(k.tenant, x.tenant); c != 0 { 96 return c 97 } 98 if c := strings.Compare(k.dataset, x.dataset); c != 0 { 99 return c 100 } 101 if k.shard.id != x.shard.id { 102 return int(k.shard.id) - int(x.shard.id) 103 } 104 return strings.Compare(k.shard.owner, x.shard.owner) 105 } 106 107 type shard struct { 108 owner string 109 id uint32 110 } 111 112 func (d *DistributionStats) build(now int64) *adaptive_placementpb.DistributionStats { 113 d.mu.Lock() 114 defer d.mu.Unlock() 115 116 tenants := make(map[string]int) 117 datasets := make(map[string]int) 118 shards := make(map[shard]int) 119 120 // Although, not strictly required, we iterate over the keys 121 // in a deterministic order to make the output deterministic. 122 keys := make([]counterKey, 0, len(d.counters)) 123 for k := range d.counters { 124 keys = append(keys, k) 125 } 126 slices.SortFunc(keys, func(a, b counterKey) int { 127 return a.compare(b) 128 }) 129 130 stats := &adaptive_placementpb.DistributionStats{CreatedAt: now} 131 for _, k := range keys { 132 c := d.counters[k] 133 // Skip dataset-wide counters. 134 if k.shard.id == 0 { 135 continue 136 } 137 138 ti, ok := tenants[k.tenant] 139 if !ok { 140 ti = len(stats.Tenants) 141 tenants[k.tenant] = ti 142 stats.Tenants = append(stats.Tenants, &adaptive_placementpb.TenantStats{ 143 TenantId: k.tenant, 144 }) 145 } 146 147 di, ok := datasets[k.dataset] 148 if !ok { 149 di = len(stats.Datasets) 150 datasets[k.dataset] = di 151 stats.Datasets = append(stats.Datasets, &adaptive_placementpb.DatasetStats{ 152 Tenant: uint32(ti), 153 Name: k.dataset, 154 }) 155 } 156 157 si, ok := shards[k.shard] 158 if !ok { 159 si = len(stats.Shards) 160 shards[k.shard] = si 161 stats.Shards = append(stats.Shards, &adaptive_placementpb.ShardStats{ 162 Id: k.shard.id, 163 Owner: k.shard.owner, 164 }) 165 } 166 167 ds := stats.Datasets[di] 168 ds.Shards = append(ds.Shards, uint32(si)) 169 ds.Usage = append(ds.Usage, uint64(math.Round(c.ValueAt(now)))) 170 } 171 172 for _, dataset := range stats.Datasets { 173 c := d.counter(counterKey{ 174 tenant: stats.Tenants[dataset.Tenant].TenantId, 175 dataset: dataset.Name, 176 }) 177 // Unlike the shard counters, we update the dataset-wide 178 // counters at the build time. 179 c.UpdateAt(float64(stdDev(dataset.Usage)), now) 180 dataset.StdDev = uint64(math.Round(c.ValueAt(now))) 181 } 182 183 return stats 184 }