github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_grouper.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_grouper.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"fmt"
    10  	"math"
    11  	"sort"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/oklog/ulid/v2"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/prometheus/model/labels"
    18  
    19  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    20  	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
    21  )
    22  
    23  type SplitAndMergeGrouper struct {
    24  	userID string
    25  	ranges []int64
    26  	logger log.Logger
    27  
    28  	// Number of shards to split source blocks into.
    29  	shardCount uint32
    30  
    31  	// Number of stages to split shards into.
    32  	splitStageSize uint32
    33  
    34  	// Number of groups that blocks used for splitting are grouped into.
    35  	splitGroupsCount uint32
    36  }
    37  
    38  // NewSplitAndMergeGrouper makes a new SplitAndMergeGrouper. The provided ranges must be sorted.
    39  // If shardCount is 0, the splitting stage is disabled.
    40  func NewSplitAndMergeGrouper(
    41  	userID string,
    42  	ranges []int64,
    43  	shardCount uint32,
    44  	splitStageSize uint32,
    45  	splitGroupsCount uint32,
    46  	logger log.Logger,
    47  ) *SplitAndMergeGrouper {
    48  	return &SplitAndMergeGrouper{
    49  		userID:           userID,
    50  		ranges:           ranges,
    51  		shardCount:       shardCount,
    52  		splitStageSize:   splitStageSize,
    53  		splitGroupsCount: splitGroupsCount,
    54  		logger:           logger,
    55  	}
    56  }
    57  
    58  func (g *SplitAndMergeGrouper) Groups(blocks map[ulid.ULID]*block.Meta) (res []*Job, err error) {
    59  	flatBlocks := make([]*block.Meta, 0, len(blocks))
    60  	for _, b := range blocks {
    61  		flatBlocks = append(flatBlocks, b)
    62  	}
    63  
    64  	for _, job := range planCompaction(g.userID, flatBlocks, g.ranges, g.shardCount, g.splitGroupsCount) {
    65  		// Sanity check: if splitting is disabled, we don't expect any job for the split stage.
    66  		if g.shardCount <= 0 && job.stage == stageSplit {
    67  			return nil, errors.Errorf("unexpected split stage job because splitting is disabled: %s", job.String())
    68  		}
    69  
    70  		// The group key is used by the compactor as a unique identifier of the compaction job.
    71  		// Its content is not important for the compactor, but uniqueness must be guaranteed.
    72  		groupKey := fmt.Sprintf("%s-%s-%s-%d-%d",
    73  			defaultGroupKeyWithoutShardID(job.blocks[0]),
    74  			job.stage,
    75  			job.shardID,
    76  			job.rangeStart,
    77  			job.rangeEnd)
    78  
    79  		// All the blocks within the same group have the same downsample
    80  		// resolution and external labels.
    81  		resolution := job.blocks[0].Resolution
    82  		externalLabels := labels.FromMap(job.blocks[0].Labels)
    83  
    84  		compactionJob := NewJob(
    85  			g.userID,
    86  			groupKey,
    87  			externalLabels,
    88  			resolution,
    89  			job.stage == stageSplit,
    90  			g.shardCount,
    91  			g.splitStageSize,
    92  			job.shardingKey(),
    93  		)
    94  
    95  		for _, m := range job.blocks {
    96  			if err := compactionJob.AppendMeta(m); err != nil {
    97  				return nil, errors.Wrap(err, "add block to compaction group")
    98  			}
    99  		}
   100  
   101  		res = append(res, compactionJob)
   102  		level.Debug(g.logger).Log("msg", "grouper found a compactable blocks group", "groupKey", groupKey, "job", job.String())
   103  	}
   104  
   105  	return res, nil
   106  }
   107  
   108  // planCompaction analyzes the input blocks and returns a list of compaction jobs that can be
   109  // run concurrently. Each returned job may belong either to this compactor instance or another one
   110  // in the cluster, so the caller should check if they belong to their instance before running them.
   111  func planCompaction(userID string, blocks []*block.Meta, ranges []int64, shardCount, splitGroups uint32) (jobs []*job) {
   112  	if len(blocks) == 0 || len(ranges) == 0 {
   113  		return nil
   114  	}
   115  
   116  	// First of all we have to group blocks using the default grouping, but not
   117  	// considering the shard ID in the external labels (because will be checked later).
   118  	mainGroups := map[string][]*block.Meta{}
   119  	for _, b := range blocks {
   120  		key := defaultGroupKeyWithoutShardID(b)
   121  		mainGroups[key] = append(mainGroups[key], b)
   122  	}
   123  
   124  	for _, mainBlocks := range mainGroups {
   125  		// Sort blocks by min time.
   126  		sortMetasByMinTime(mainBlocks)
   127  
   128  		for _, tr := range ranges {
   129  		nextJob:
   130  			for _, job := range planCompactionByRange(userID, mainBlocks, tr, tr == ranges[0], shardCount, splitGroups) {
   131  				// We can plan a job only if it doesn't conflict with other jobs already planned.
   132  				// Since we run the planning for each compaction range in increasing order, we guarantee
   133  				// that a job for the current time range is planned only if there's no other job for the
   134  				// same shard ID and an overlapping smaller time range.
   135  				for _, j := range jobs {
   136  					if job.conflicts(j) {
   137  						continue nextJob
   138  					}
   139  				}
   140  
   141  				jobs = append(jobs, job)
   142  			}
   143  		}
   144  	}
   145  
   146  	// Ensure we don't compact the most recent blocks prematurely when another one of
   147  	// the same size still fits in the range. To do it, we consider a job valid only
   148  	// if its range is before the most recent block or if it fully covers the range.
   149  	highestMaxTime := getMaxTime(blocks)
   150  
   151  	for idx := 0; idx < len(jobs); {
   152  		job := jobs[idx]
   153  
   154  		// If the job covers a range before the most recent block, it's fine.
   155  		if job.rangeEnd <= highestMaxTime {
   156  			idx++
   157  			continue
   158  		}
   159  
   160  		// If the job covers the full range, it's fine.
   161  		if job.maxTime()-job.minTime() == job.rangeLength() {
   162  			idx++
   163  			continue
   164  		}
   165  
   166  		// We have found a job which would compact recent blocks prematurely,
   167  		// so we need to filter it out.
   168  		jobs = append(jobs[:idx], jobs[idx+1:]...)
   169  	}
   170  
   171  	// Jobs will be sorted later using configured job sorting algorithm.
   172  	// Here we sort them by sharding key, to keep the output stable for testing.
   173  	sort.SliceStable(jobs, func(i, j int) bool {
   174  		if iKey, jKey := jobs[i].shardingKey(), jobs[j].shardingKey(); iKey != jKey {
   175  			return iKey < jKey
   176  		}
   177  
   178  		// The sharding key could be equal but external labels can still be different.
   179  		return defaultGroupKeyWithoutShardID(jobs[i].blocks[0]) < defaultGroupKeyWithoutShardID(jobs[j].blocks[0])
   180  	})
   181  
   182  	return jobs
   183  }
   184  
   185  // planCompactionByRange analyze the input blocks and returns a list of compaction jobs to
   186  // compact blocks for the given compaction time range. Input blocks MUST be sorted by MinTime.
   187  func planCompactionByRange(userID string, blocks []*block.Meta, tr int64, isSmallestRange bool, shardCount, splitGroups uint32) (jobs []*job) {
   188  	groups := groupBlocksByRange(blocks, tr)
   189  
   190  	for _, group := range groups {
   191  		// If this is the smallest time range and there's any non-split block,
   192  		// then we should plan a job to split blocks.
   193  		if shardCount > 0 && isSmallestRange {
   194  			if splitJobs := planSplitting(userID, group, splitGroups); len(splitJobs) > 0 {
   195  				jobs = append(jobs, splitJobs...)
   196  				continue
   197  			}
   198  		}
   199  
   200  		// If we reach this point, all blocks for this time range have already been split
   201  		// (or we're not processing the smallest time range, or splitting is disabled).
   202  		// Then, we can check if there's any group of blocks to be merged together for each shard.
   203  		for shardID, shardBlocks := range groupBlocksByShardID(group.blocks) {
   204  			// No merging to do if there are less than 2 blocks.
   205  			if len(shardBlocks) < 2 {
   206  				continue
   207  			}
   208  
   209  			jobs = append(jobs, &job{
   210  				userID:  userID,
   211  				stage:   stageMerge,
   212  				shardID: shardID,
   213  				blocksGroup: blocksGroup{
   214  					rangeStart: group.rangeStart,
   215  					rangeEnd:   group.rangeEnd,
   216  					blocks:     shardBlocks,
   217  				},
   218  			})
   219  		}
   220  	}
   221  
   222  	return jobs
   223  }
   224  
   225  // planSplitting returns a job to split the blocks in the input group or nil if there's nothing to do because
   226  // all blocks in the group have already been split.
   227  func planSplitting(userID string, group blocksGroup, splitGroups uint32) []*job {
   228  	blocks := group.getNonShardedBlocks()
   229  	if len(blocks) == 0 {
   230  		return nil
   231  	}
   232  
   233  	jobs := map[uint32]*job{}
   234  
   235  	if splitGroups == 0 {
   236  		splitGroups = 1
   237  	}
   238  
   239  	// The number of source blocks could be very large so, to have a better horizontal scaling, we should group
   240  	// the source blocks into N groups (where N = number of shards) and create a job for each group of blocks to
   241  	// merge and split.
   242  	for _, blk := range blocks {
   243  		splitGroup := block.HashBlockID(blk.ULID) % splitGroups
   244  
   245  		if jobs[splitGroup] == nil {
   246  			jobs[splitGroup] = &job{
   247  				userID:  userID,
   248  				stage:   stageSplit,
   249  				shardID: sharding.FormatShardIDLabelValue(uint64(splitGroup), uint64(splitGroups)),
   250  				blocksGroup: blocksGroup{
   251  					rangeStart: group.rangeStart,
   252  					rangeEnd:   group.rangeEnd,
   253  				},
   254  			}
   255  		}
   256  
   257  		jobs[splitGroup].blocks = append(jobs[splitGroup].blocks, blk)
   258  	}
   259  
   260  	// Convert the output.
   261  	out := make([]*job, 0, len(jobs))
   262  	for _, job := range jobs {
   263  		out = append(out, job)
   264  	}
   265  
   266  	return out
   267  }
   268  
   269  // groupBlocksByShardID groups the blocks by shard ID (read from the block external labels).
   270  // If a block doesn't have any shard ID in the external labels, it will be grouped with the
   271  // shard ID set to an empty string.
   272  func groupBlocksByShardID(blocks []*block.Meta) map[string][]*block.Meta {
   273  	groups := map[string][]*block.Meta{}
   274  
   275  	for _, block := range blocks {
   276  		// If the label doesn't exist, we'll group together such blocks using an
   277  		// empty string as shard ID.
   278  		shardID := block.Labels[sharding.CompactorShardIDLabel]
   279  		groups[shardID] = append(groups[shardID], block)
   280  	}
   281  
   282  	return groups
   283  }
   284  
   285  // groupBlocksByRange groups the blocks by the time range. The range sequence starts at 0.
   286  // Input blocks MUST be sorted by MinTime.
   287  //
   288  // For example, if we have blocks [0-10, 10-20, 50-60, 90-100] and the split range tr is 30
   289  // it returns [0-10, 10-20], [50-60], [90-100].
   290  func groupBlocksByRange(blocks []*block.Meta, tr int64) []blocksGroup {
   291  	var ret []blocksGroup
   292  
   293  	for i := 0; i < len(blocks); {
   294  		var (
   295  			group blocksGroup
   296  			m     = blocks[i]
   297  		)
   298  		group.rangeStart = getRangeStart(m, tr)
   299  		group.rangeEnd = group.rangeStart + tr
   300  
   301  		// Skip blocks that don't fall into the range. This can happen via mis-alignment or
   302  		// by being the multiple of the intended range.
   303  		if int64(m.MaxTime) > group.rangeEnd {
   304  			i++
   305  			continue
   306  		}
   307  
   308  		// Add all blocks to the current group that are within [t0, t0+tr].
   309  		for ; i < len(blocks); i++ {
   310  			// If the block does not start within this group, then we should break the iteration
   311  			// and move it to the next group.
   312  			if int64(blocks[i].MinTime) >= group.rangeEnd {
   313  				break
   314  			}
   315  
   316  			// If the block doesn't fall into this group, but it started within this group then it
   317  			// means it spans across multiple ranges and we should skip it.
   318  			if int64(blocks[i].MaxTime) > group.rangeEnd {
   319  				continue
   320  			}
   321  
   322  			group.blocks = append(group.blocks, blocks[i])
   323  		}
   324  
   325  		if len(group.blocks) > 0 {
   326  			ret = append(ret, group)
   327  		}
   328  	}
   329  
   330  	return ret
   331  }
   332  
   333  func getRangeStart(m *block.Meta, tr int64) int64 {
   334  	// Compute start of aligned time range of size tr closest to the current block's start.
   335  	if m.MinTime >= 0 {
   336  		return tr * (int64(m.MinTime) / tr)
   337  	}
   338  	return tr * ((int64(m.MinTime) - tr + 1) / tr)
   339  }
   340  
   341  func sortMetasByMinTime(metas []*block.Meta) []*block.Meta {
   342  	sort.Slice(metas, func(i, j int) bool {
   343  		if metas[i].MinTime != metas[j].MinTime {
   344  			return metas[i].MinTime < metas[j].MinTime
   345  		}
   346  
   347  		// Compare labels in case of same MinTime to get stable results.
   348  		return labels.Compare(labels.FromMap(metas[i].Labels), labels.FromMap(metas[j].Labels)) < 0
   349  	})
   350  
   351  	return metas
   352  }
   353  
   354  // getMaxTime returns the highest max time across all input blocks.
   355  func getMaxTime(blocks []*block.Meta) int64 {
   356  	maxTime := int64(math.MinInt64)
   357  
   358  	for _, block := range blocks {
   359  		if int64(block.MaxTime) > maxTime {
   360  			maxTime = int64(block.MaxTime)
   361  		}
   362  	}
   363  
   364  	return maxTime
   365  }
   366  
   367  // defaultGroupKeyWithoutShardID returns the default group key excluding ShardIDLabelName
   368  // when computing it.
   369  func defaultGroupKeyWithoutShardID(meta *block.Meta) string {
   370  	return defaultGroupKey(meta.Resolution, labelsWithout(meta.Labels, sharding.CompactorShardIDLabel, block.HostnameLabel))
   371  }
   372  
   373  // labelsWithout returns a copy of the input labels without the given labels.
   374  func labelsWithout(base map[string]string, without ...string) labels.Labels {
   375  	b := labels.NewScratchBuilder(len(base))
   376  Outer:
   377  	for k, v := range base {
   378  		for _, w := range without {
   379  			if k == w {
   380  				continue Outer
   381  			}
   382  		}
   383  		b.Add(k, v)
   384  	}
   385  	b.Sort()
   386  	return b.Labels()
   387  }