github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_job.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_job.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "fmt" 10 "slices" 11 "strings" 12 "time" 13 14 "github.com/prometheus/prometheus/model/labels" 15 16 "github.com/grafana/pyroscope/pkg/phlaredb/block" 17 "github.com/grafana/pyroscope/pkg/phlaredb/sharding" 18 ) 19 20 type compactionStage string 21 22 const ( 23 stageSplit compactionStage = "split" 24 stageMerge compactionStage = "merge" 25 ) 26 27 // job holds a compaction job planned by the split merge compactor. 28 type job struct { 29 userID string 30 31 // Source blocks that should be compacted together when running this job. 32 blocksGroup 33 34 // The compaction stage of this job. 35 stage compactionStage 36 37 // The shard blocks in this job belong to. Its exact value depends on the stage: 38 // 39 // - split: identifier of the group of blocks that are going to be merged together 40 // when splitting their series into multiple output blocks. 41 // 42 // - merge: value of the ShardIDLabelName of all blocks in this job (all blocks in 43 // the job share the same label value). 44 shardID string 45 } 46 47 func (j *job) shardingKey() string { 48 return fmt.Sprintf("%s-%s-%d-%d-%s", j.userID, j.stage, j.rangeStart, j.rangeEnd, j.shardID) 49 } 50 51 // conflicts returns true if the two jobs cannot be planned at the same time. 52 func (j *job) conflicts(other *job) bool { 53 // Never conflict if related to different users or if time ranges don't overlap. 54 if j.userID != other.userID || !j.overlaps(other.blocksGroup) { 55 return false 56 } 57 58 // Blocks with different downsample resolution or external labels (excluding the shard ID) 59 // are never merged together, so they can't conflict. Since all blocks within the same job are expected to have the same 60 // downsample resolution and external labels, we just check the 1st block of each job. 61 if len(j.blocks) > 0 && len(other.blocks) > 0 { 62 myLabels := labelsWithout(j.blocksGroup.blocks[0].Labels, block.HostnameLabel, sharding.CompactorShardIDLabel) 63 otherLabels := labelsWithout(other.blocksGroup.blocks[0].Labels, block.HostnameLabel, sharding.CompactorShardIDLabel) 64 if !labels.Equal(myLabels, otherLabels) { 65 return false 66 } 67 if j.blocksGroup.blocks[0].Downsample != other.blocksGroup.blocks[0].Downsample { 68 return false 69 } 70 } 71 72 // We should merge after all splitting has been done, so two overlapping jobs 73 // for different stages shouldn't coexist. 74 if j.stage != other.stage { 75 return true 76 } 77 78 // At this point we have two overlapping jobs for the same stage. They conflict if 79 // belonging to the same shard. 80 return j.shardID == other.shardID 81 } 82 83 func (j *job) String() string { 84 blocks := make([]string, 0, len(j.blocks)) 85 for _, block := range j.blocks { 86 minT := block.MinTime.Time().UTC() 87 maxT := block.MaxTime.Time().UTC() 88 blocks = append(blocks, fmt.Sprintf("%s (min time: %s, max time: %s)", block.ULID.String(), minT.Format(time.RFC3339Nano), maxT.Format(time.RFC3339Nano))) 89 } 90 91 // Keep the output stable for tests. 92 slices.Sort(blocks) 93 94 return fmt.Sprintf("stage: %s, range start: %d, range end: %d, shard: %s, blocks: %s", 95 j.stage, j.rangeStart, j.rangeEnd, j.shardID, strings.Join(blocks, ",")) 96 } 97 98 // blocksGroup holds a group of blocks within the same time range. 99 type blocksGroup struct { 100 rangeStart int64 // Included. 101 rangeEnd int64 // Included. 102 blocks []*block.Meta // Sorted by MinTime. 103 } 104 105 // overlaps returns whether the group range overlaps with the input group. 106 func (g blocksGroup) overlaps(other blocksGroup) bool { 107 if g.rangeStart >= other.rangeEnd || other.rangeStart >= g.rangeEnd { 108 return false 109 } 110 111 return true 112 } 113 114 func (g blocksGroup) rangeLength() int64 { 115 return g.rangeEnd - g.rangeStart 116 } 117 118 // minTime returns the lowest MinTime across all blocks in the group. 119 func (g blocksGroup) minTime() int64 { 120 // Blocks are expected to be sorted by MinTime. 121 return int64(g.blocks[0].MinTime) 122 } 123 124 // maxTime returns the highest MaxTime across all blocks in the group. 125 func (g blocksGroup) maxTime() int64 { 126 max := g.blocks[0].MaxTime 127 128 for _, b := range g.blocks[1:] { 129 if b.MaxTime > max { 130 max = b.MaxTime 131 } 132 } 133 134 return int64(max) 135 } 136 137 // getNonShardedBlocks returns the list of non-sharded blocks. 138 func (g blocksGroup) getNonShardedBlocks() []*block.Meta { 139 var out []*block.Meta 140 141 for _, b := range g.blocks { 142 if value, ok := b.Labels[sharding.CompactorShardIDLabel]; !ok || value == "" { 143 out = append(out, b) 144 } 145 } 146 147 return out 148 }