github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_job.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_job.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"fmt"
    10  	"slices"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/prometheus/prometheus/model/labels"
    15  
    16  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    17  	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
    18  )
    19  
    20  type compactionStage string
    21  
    22  const (
    23  	stageSplit compactionStage = "split"
    24  	stageMerge compactionStage = "merge"
    25  )
    26  
    27  // job holds a compaction job planned by the split merge compactor.
    28  type job struct {
    29  	userID string
    30  
    31  	// Source blocks that should be compacted together when running this job.
    32  	blocksGroup
    33  
    34  	// The compaction stage of this job.
    35  	stage compactionStage
    36  
    37  	// The shard blocks in this job belong to. Its exact value depends on the stage:
    38  	//
    39  	// - split: identifier of the group of blocks that are going to be merged together
    40  	// when splitting their series into multiple output blocks.
    41  	//
    42  	// - merge: value of the ShardIDLabelName of all blocks in this job (all blocks in
    43  	// the job share the same label value).
    44  	shardID string
    45  }
    46  
    47  func (j *job) shardingKey() string {
    48  	return fmt.Sprintf("%s-%s-%d-%d-%s", j.userID, j.stage, j.rangeStart, j.rangeEnd, j.shardID)
    49  }
    50  
    51  // conflicts returns true if the two jobs cannot be planned at the same time.
    52  func (j *job) conflicts(other *job) bool {
    53  	// Never conflict if related to different users or if time ranges don't overlap.
    54  	if j.userID != other.userID || !j.overlaps(other.blocksGroup) {
    55  		return false
    56  	}
    57  
    58  	// Blocks with different downsample resolution or external labels (excluding the shard ID)
    59  	// are never merged together, so they can't conflict. Since all blocks within the same job are expected to have the same
    60  	// downsample resolution and external labels, we just check the 1st block of each job.
    61  	if len(j.blocks) > 0 && len(other.blocks) > 0 {
    62  		myLabels := labelsWithout(j.blocksGroup.blocks[0].Labels, block.HostnameLabel, sharding.CompactorShardIDLabel)
    63  		otherLabels := labelsWithout(other.blocksGroup.blocks[0].Labels, block.HostnameLabel, sharding.CompactorShardIDLabel)
    64  		if !labels.Equal(myLabels, otherLabels) {
    65  			return false
    66  		}
    67  		if j.blocksGroup.blocks[0].Downsample != other.blocksGroup.blocks[0].Downsample {
    68  			return false
    69  		}
    70  	}
    71  
    72  	// We should merge after all splitting has been done, so two overlapping jobs
    73  	// for different stages shouldn't coexist.
    74  	if j.stage != other.stage {
    75  		return true
    76  	}
    77  
    78  	// At this point we have two overlapping jobs for the same stage. They conflict if
    79  	// belonging to the same shard.
    80  	return j.shardID == other.shardID
    81  }
    82  
    83  func (j *job) String() string {
    84  	blocks := make([]string, 0, len(j.blocks))
    85  	for _, block := range j.blocks {
    86  		minT := block.MinTime.Time().UTC()
    87  		maxT := block.MaxTime.Time().UTC()
    88  		blocks = append(blocks, fmt.Sprintf("%s (min time: %s, max time: %s)", block.ULID.String(), minT.Format(time.RFC3339Nano), maxT.Format(time.RFC3339Nano)))
    89  	}
    90  
    91  	// Keep the output stable for tests.
    92  	slices.Sort(blocks)
    93  
    94  	return fmt.Sprintf("stage: %s, range start: %d, range end: %d, shard: %s, blocks: %s",
    95  		j.stage, j.rangeStart, j.rangeEnd, j.shardID, strings.Join(blocks, ","))
    96  }
    97  
    98  // blocksGroup holds a group of blocks within the same time range.
    99  type blocksGroup struct {
   100  	rangeStart int64         // Included.
   101  	rangeEnd   int64         // Included.
   102  	blocks     []*block.Meta // Sorted by MinTime.
   103  }
   104  
   105  // overlaps returns whether the group range overlaps with the input group.
   106  func (g blocksGroup) overlaps(other blocksGroup) bool {
   107  	if g.rangeStart >= other.rangeEnd || other.rangeStart >= g.rangeEnd {
   108  		return false
   109  	}
   110  
   111  	return true
   112  }
   113  
   114  func (g blocksGroup) rangeLength() int64 {
   115  	return g.rangeEnd - g.rangeStart
   116  }
   117  
   118  // minTime returns the lowest MinTime across all blocks in the group.
   119  func (g blocksGroup) minTime() int64 {
   120  	// Blocks are expected to be sorted by MinTime.
   121  	return int64(g.blocks[0].MinTime)
   122  }
   123  
   124  // maxTime returns the highest MaxTime across all blocks in the group.
   125  func (g blocksGroup) maxTime() int64 {
   126  	max := g.blocks[0].MaxTime
   127  
   128  	for _, b := range g.blocks[1:] {
   129  		if b.MaxTime > max {
   130  			max = b.MaxTime
   131  		}
   132  	}
   133  
   134  	return int64(max)
   135  }
   136  
   137  // getNonShardedBlocks returns the list of non-sharded blocks.
   138  func (g blocksGroup) getNonShardedBlocks() []*block.Meta {
   139  	var out []*block.Meta
   140  
   141  	for _, b := range g.blocks {
   142  		if value, ok := b.Labels[sharding.CompactorShardIDLabel]; !ok || value == "" {
   143  			out = append(out, b)
   144  		}
   145  	}
   146  
   147  	return out
   148  }