github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/shard_allocator.go (about) 1 package adaptiveplacement 2 3 import ( 4 "math" 5 ) 6 7 // shardAllocator dynamically adjusts the number of shards allocated for a 8 // dataset based on observed data rates. The system is designed to scale out 9 // rapidly in response to increased load while scaling in more conservatively 10 // to prevent unnecessary shard churn. 11 // 12 // The system calculates the total data rate from incoming dataset statistics 13 // and determines the required number of shards based on a fixed unit size 14 // i.e., the portion of the rate that a single shard can handle. Note that 15 // it is expected that the rate values are aggregated over a time window and 16 // are not varying overly frequently. 17 // 18 // When the observed data rate increases, the system aggressively increases the 19 // number of shards. This is achieved using an exponential growth factor that 20 // doubles the shard allocation request on consecutive scale-out events. This 21 // allows preventing "laddering" (slow, step-wise shard increases) when load 22 // is growing steadily. 23 // 24 // To avoid the risk of premature shrinking that could cause oscillations, the 25 // system decreases the number of shards more cautiously. It enforces a minimum 26 // shard count over a configurable time window: the system doesn't allocate 27 // fewer shards than were allocated during the last window. 28 type shardAllocator struct { 29 // Unit size denotes the portion of rate that needs 30 // to be allocated to a single shard. 31 unitSize uint64 32 // Minimum and maximum number of shards allowed. 33 min, max int 34 // Burst window specifies the time interval during which 35 // the shard allocation delta multiplier grows on scale outs. 36 burstWindow int64 37 // Decay window specifies the minimal time interval 38 // before the target number of shards can be decreased. 39 decayWindow int64 40 41 target int // Target number of shards. 42 burstOffset int64 // Timestamp of the burst window start. 43 multiplier float64 44 decayOffset int64 // Timestamp of the decay window start. 45 previousMin int // Minimum number of shards in the previous decay window. 46 currentMin int // Minimum number of shards in the current decay window. 47 } 48 49 func newShardAllocator(limits PlacementLimits) *shardAllocator { 50 a := new(shardAllocator) 51 a.setLimits(limits) 52 return a 53 } 54 55 func (a *shardAllocator) setLimits(limits PlacementLimits) { 56 a.unitSize = limits.UnitSizeBytes 57 a.min = int(limits.MinDatasetShards) 58 a.max = int(limits.MaxDatasetShards) 59 a.burstWindow = limits.BurstWindow.Nanoseconds() 60 a.decayWindow = limits.DecayWindow.Nanoseconds() 61 } 62 63 func (a *shardAllocator) observe(usage uint64, now int64) int { 64 target := int(usage/a.unitSize) + 1 65 delta := target - a.target 66 if delta > 0 { 67 // Scale out. 68 if a.burstOffset == 0 || now-a.burstOffset >= a.burstWindow { 69 // Reset multiplier if burst window has passed. 70 a.multiplier = 1 71 } else { 72 // Increase multiplier on consecutive scale-outs within burst window. 73 // Limiting the multiplier here allow us to not worry about overflows. 74 if a.multiplier < 16 { 75 a.multiplier *= 2 76 } 77 scaled := target + int(math.Ceil(float64(delta)*a.multiplier)) 78 target = min(2*target, scaled) 79 } 80 // Start/prolong burst window. 81 a.burstOffset = now 82 } 83 if a.decayOffset == 0 || now-a.decayOffset >= a.decayWindow { 84 a.previousMin, a.currentMin = a.currentMin, target 85 a.decayOffset = now 86 } 87 a.currentMin = max(a.currentMin, target) 88 a.target = min(a.max, max(a.min, a.previousMin, a.currentMin)) 89 return a.target 90 }