github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/ruler.go (about)

     1  package adaptiveplacement
     2  
     3  import (
     4  	"slices"
     5  	"strings"
     6  	"time"
     7  
     8  	"github.com/grafana/pyroscope/pkg/segmentwriter/client/distributor/placement/adaptiveplacement/adaptive_placementpb"
     9  )
    10  
    11  // Ruler builds placement rules based on distribution stats.
    12  //
    13  // Ruler is not safe for concurrent use: the caller should
    14  // ensure synchronization.
    15  type Ruler struct {
    16  	limits   Limits
    17  	datasets map[datasetKey]*datasetShards
    18  }
    19  
    20  func NewRuler(limits Limits) *Ruler {
    21  	return &Ruler{
    22  		limits:   limits,
    23  		datasets: make(map[datasetKey]*datasetShards),
    24  	}
    25  }
    26  
    27  func (r *Ruler) Load(rules *adaptive_placementpb.PlacementRules) {
    28  	tenantLimits := make([]PlacementLimits, len(rules.Tenants))
    29  	for i, t := range rules.Tenants {
    30  		tenantLimits[i] = r.limits.PlacementLimits(t.TenantId)
    31  	}
    32  	for _, ds := range rules.Datasets {
    33  		k := datasetKey{
    34  			tenant:  rules.Tenants[ds.Tenant].TenantId,
    35  			dataset: ds.Name,
    36  		}
    37  		limits := tenantLimits[ds.Tenant]
    38  		dataset := &datasetShards{
    39  			allocator:     newShardAllocator(limits),
    40  			lastUpdate:    rules.CreatedAt,
    41  			tenantShards:  ds.TenantShardLimit,
    42  			datasetShards: ds.DatasetShardLimit,
    43  			loadBalancing: ds.LoadBalancing,
    44  		}
    45  		// NOTE(kolesnikovae): We prohibit decreasing the number
    46  		// of shards for the dataset till the expiration of the
    47  		// decay window since the moment rules were created. Thus,
    48  		// if statistics are not available or populated slowly,
    49  		// we won't shrink the dataset prematurely but will be
    50  		// able to scale out if needed.
    51  		dataset.allocator.decayOffset = rules.CreatedAt
    52  		dataset.allocator.currentMin = int(ds.DatasetShardLimit)
    53  		r.datasets[k] = dataset
    54  	}
    55  }
    56  
    57  func (r *Ruler) BuildRules(stats *adaptive_placementpb.DistributionStats) *adaptive_placementpb.PlacementRules {
    58  	rules := adaptive_placementpb.PlacementRules{
    59  		Tenants:   make([]*adaptive_placementpb.TenantPlacement, len(stats.Tenants)),
    60  		Datasets:  make([]*adaptive_placementpb.DatasetPlacement, len(stats.Datasets)),
    61  		CreatedAt: stats.CreatedAt,
    62  	}
    63  
    64  	tenantLimits := make([]PlacementLimits, len(stats.Tenants))
    65  	tenants := make(map[string]int)
    66  	for i, t := range stats.Tenants {
    67  		tenants[t.TenantId] = i
    68  		tenantLimits[i] = r.limits.PlacementLimits(t.TenantId)
    69  		rules.Tenants[i] = &adaptive_placementpb.TenantPlacement{
    70  			TenantId: t.TenantId,
    71  		}
    72  	}
    73  
    74  	for i, datasetStats := range stats.Datasets {
    75  		k := datasetKey{
    76  			tenant:  rules.Tenants[datasetStats.Tenant].TenantId,
    77  			dataset: datasetStats.Name,
    78  		}
    79  		limits := tenantLimits[datasetStats.Tenant]
    80  		dataset, ok := r.datasets[k]
    81  		if !ok {
    82  			dataset = &datasetShards{
    83  				allocator:     new(shardAllocator),
    84  				lastUpdate:    stats.CreatedAt,
    85  				tenantShards:  limits.TenantShards,
    86  				datasetShards: limits.DefaultDatasetShards,
    87  				loadBalancing: limits.LoadBalancing.proto(),
    88  			}
    89  			r.datasets[k] = dataset
    90  		}
    91  		rules.Datasets[i] = dataset.placement(datasetStats, limits, stats.CreatedAt)
    92  	}
    93  
    94  	// Include datasets that were not present in the current stats.
    95  	// Although, not strictly required, we iterate over the keys
    96  	// in a deterministic order to make the output deterministic.
    97  	keys := make([]datasetKey, 0, len(r.datasets))
    98  	for k, dataset := range r.datasets {
    99  		if dataset.lastUpdate < stats.CreatedAt {
   100  			keys = append(keys, k)
   101  		}
   102  	}
   103  	slices.SortFunc(keys, func(a, b datasetKey) int {
   104  		return a.compare(b)
   105  	})
   106  
   107  	for _, k := range keys {
   108  		dataset := r.datasets[k]
   109  		t, ok := tenants[k.tenant]
   110  		if !ok {
   111  			t = len(rules.Tenants)
   112  			tenants[k.tenant] = t
   113  			rules.Tenants = append(rules.Tenants, &adaptive_placementpb.TenantPlacement{
   114  				TenantId: k.tenant,
   115  			})
   116  		}
   117  		rules.Datasets = append(rules.Datasets, &adaptive_placementpb.DatasetPlacement{
   118  			Tenant:            uint32(t),
   119  			Name:              k.dataset,
   120  			TenantShardLimit:  dataset.tenantShards,
   121  			DatasetShardLimit: dataset.datasetShards,
   122  			LoadBalancing:     dataset.loadBalancing,
   123  		})
   124  	}
   125  
   126  	return &rules
   127  }
   128  
   129  func (r *Ruler) Expire(before time.Time) {
   130  	for k, ds := range r.datasets {
   131  		if time.Unix(0, ds.lastUpdate).Before(before) {
   132  			delete(r.datasets, k)
   133  		}
   134  	}
   135  }
   136  
   137  type datasetKey struct{ tenant, dataset string }
   138  
   139  func (k datasetKey) compare(x datasetKey) int {
   140  	if c := strings.Compare(k.tenant, x.tenant); c != 0 {
   141  		return c
   142  	}
   143  	return strings.Compare(k.dataset, x.dataset)
   144  }
   145  
   146  type datasetShards struct {
   147  	allocator *shardAllocator
   148  	// Last time the dataset was updated,
   149  	// according to the stats update time.
   150  	lastUpdate int64
   151  	// Limits.
   152  	tenantShards  uint64
   153  	datasetShards uint64
   154  	loadBalancing adaptive_placementpb.LoadBalancing
   155  }
   156  
   157  func (d *datasetShards) placement(
   158  	stats *adaptive_placementpb.DatasetStats,
   159  	limits PlacementLimits,
   160  	now int64,
   161  ) *adaptive_placementpb.DatasetPlacement {
   162  	d.lastUpdate = now
   163  	d.allocator.setLimits(limits)
   164  	d.tenantShards = limits.TenantShards
   165  	d.datasetShards = uint64(d.allocator.observe(sum(stats.Usage), now))
   166  	// Determine whether we need to change the load balancing strategy.
   167  	configured := limits
   168  	if configured.LoadBalancing != DynamicLoadBalancing {
   169  		d.loadBalancing = configured.LoadBalancing.proto()
   170  	} else if configured.LoadBalancing.needsDynamicBalancing(d.loadBalancing) {
   171  		d.loadBalancing = loadBalancingStrategy(stats, d.allocator.unitSize, d.allocator.target).proto()
   172  	}
   173  	return &adaptive_placementpb.DatasetPlacement{
   174  		Tenant:            stats.Tenant,
   175  		Name:              stats.Name,
   176  		TenantShardLimit:  d.tenantShards,
   177  		DatasetShardLimit: d.datasetShards,
   178  		LoadBalancing:     d.loadBalancing,
   179  	}
   180  }