github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_grouper.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_grouper.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "fmt" 10 "math" 11 "sort" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/oklog/ulid/v2" 16 "github.com/pkg/errors" 17 "github.com/prometheus/prometheus/model/labels" 18 19 "github.com/grafana/pyroscope/pkg/phlaredb/block" 20 "github.com/grafana/pyroscope/pkg/phlaredb/sharding" 21 ) 22 23 type SplitAndMergeGrouper struct { 24 userID string 25 ranges []int64 26 logger log.Logger 27 28 // Number of shards to split source blocks into. 29 shardCount uint32 30 31 // Number of stages to split shards into. 32 splitStageSize uint32 33 34 // Number of groups that blocks used for splitting are grouped into. 35 splitGroupsCount uint32 36 } 37 38 // NewSplitAndMergeGrouper makes a new SplitAndMergeGrouper. The provided ranges must be sorted. 39 // If shardCount is 0, the splitting stage is disabled. 40 func NewSplitAndMergeGrouper( 41 userID string, 42 ranges []int64, 43 shardCount uint32, 44 splitStageSize uint32, 45 splitGroupsCount uint32, 46 logger log.Logger, 47 ) *SplitAndMergeGrouper { 48 return &SplitAndMergeGrouper{ 49 userID: userID, 50 ranges: ranges, 51 shardCount: shardCount, 52 splitStageSize: splitStageSize, 53 splitGroupsCount: splitGroupsCount, 54 logger: logger, 55 } 56 } 57 58 func (g *SplitAndMergeGrouper) Groups(blocks map[ulid.ULID]*block.Meta) (res []*Job, err error) { 59 flatBlocks := make([]*block.Meta, 0, len(blocks)) 60 for _, b := range blocks { 61 flatBlocks = append(flatBlocks, b) 62 } 63 64 for _, job := range planCompaction(g.userID, flatBlocks, g.ranges, g.shardCount, g.splitGroupsCount) { 65 // Sanity check: if splitting is disabled, we don't expect any job for the split stage. 66 if g.shardCount <= 0 && job.stage == stageSplit { 67 return nil, errors.Errorf("unexpected split stage job because splitting is disabled: %s", job.String()) 68 } 69 70 // The group key is used by the compactor as a unique identifier of the compaction job. 71 // Its content is not important for the compactor, but uniqueness must be guaranteed. 72 groupKey := fmt.Sprintf("%s-%s-%s-%d-%d", 73 defaultGroupKeyWithoutShardID(job.blocks[0]), 74 job.stage, 75 job.shardID, 76 job.rangeStart, 77 job.rangeEnd) 78 79 // All the blocks within the same group have the same downsample 80 // resolution and external labels. 81 resolution := job.blocks[0].Resolution 82 externalLabels := labels.FromMap(job.blocks[0].Labels) 83 84 compactionJob := NewJob( 85 g.userID, 86 groupKey, 87 externalLabels, 88 resolution, 89 job.stage == stageSplit, 90 g.shardCount, 91 g.splitStageSize, 92 job.shardingKey(), 93 ) 94 95 for _, m := range job.blocks { 96 if err := compactionJob.AppendMeta(m); err != nil { 97 return nil, errors.Wrap(err, "add block to compaction group") 98 } 99 } 100 101 res = append(res, compactionJob) 102 level.Debug(g.logger).Log("msg", "grouper found a compactable blocks group", "groupKey", groupKey, "job", job.String()) 103 } 104 105 return res, nil 106 } 107 108 // planCompaction analyzes the input blocks and returns a list of compaction jobs that can be 109 // run concurrently. Each returned job may belong either to this compactor instance or another one 110 // in the cluster, so the caller should check if they belong to their instance before running them. 111 func planCompaction(userID string, blocks []*block.Meta, ranges []int64, shardCount, splitGroups uint32) (jobs []*job) { 112 if len(blocks) == 0 || len(ranges) == 0 { 113 return nil 114 } 115 116 // First of all we have to group blocks using the default grouping, but not 117 // considering the shard ID in the external labels (because will be checked later). 118 mainGroups := map[string][]*block.Meta{} 119 for _, b := range blocks { 120 key := defaultGroupKeyWithoutShardID(b) 121 mainGroups[key] = append(mainGroups[key], b) 122 } 123 124 for _, mainBlocks := range mainGroups { 125 // Sort blocks by min time. 126 sortMetasByMinTime(mainBlocks) 127 128 for _, tr := range ranges { 129 nextJob: 130 for _, job := range planCompactionByRange(userID, mainBlocks, tr, tr == ranges[0], shardCount, splitGroups) { 131 // We can plan a job only if it doesn't conflict with other jobs already planned. 132 // Since we run the planning for each compaction range in increasing order, we guarantee 133 // that a job for the current time range is planned only if there's no other job for the 134 // same shard ID and an overlapping smaller time range. 135 for _, j := range jobs { 136 if job.conflicts(j) { 137 continue nextJob 138 } 139 } 140 141 jobs = append(jobs, job) 142 } 143 } 144 } 145 146 // Ensure we don't compact the most recent blocks prematurely when another one of 147 // the same size still fits in the range. To do it, we consider a job valid only 148 // if its range is before the most recent block or if it fully covers the range. 149 highestMaxTime := getMaxTime(blocks) 150 151 for idx := 0; idx < len(jobs); { 152 job := jobs[idx] 153 154 // If the job covers a range before the most recent block, it's fine. 155 if job.rangeEnd <= highestMaxTime { 156 idx++ 157 continue 158 } 159 160 // If the job covers the full range, it's fine. 161 if job.maxTime()-job.minTime() == job.rangeLength() { 162 idx++ 163 continue 164 } 165 166 // We have found a job which would compact recent blocks prematurely, 167 // so we need to filter it out. 168 jobs = append(jobs[:idx], jobs[idx+1:]...) 169 } 170 171 // Jobs will be sorted later using configured job sorting algorithm. 172 // Here we sort them by sharding key, to keep the output stable for testing. 173 sort.SliceStable(jobs, func(i, j int) bool { 174 if iKey, jKey := jobs[i].shardingKey(), jobs[j].shardingKey(); iKey != jKey { 175 return iKey < jKey 176 } 177 178 // The sharding key could be equal but external labels can still be different. 179 return defaultGroupKeyWithoutShardID(jobs[i].blocks[0]) < defaultGroupKeyWithoutShardID(jobs[j].blocks[0]) 180 }) 181 182 return jobs 183 } 184 185 // planCompactionByRange analyze the input blocks and returns a list of compaction jobs to 186 // compact blocks for the given compaction time range. Input blocks MUST be sorted by MinTime. 187 func planCompactionByRange(userID string, blocks []*block.Meta, tr int64, isSmallestRange bool, shardCount, splitGroups uint32) (jobs []*job) { 188 groups := groupBlocksByRange(blocks, tr) 189 190 for _, group := range groups { 191 // If this is the smallest time range and there's any non-split block, 192 // then we should plan a job to split blocks. 193 if shardCount > 0 && isSmallestRange { 194 if splitJobs := planSplitting(userID, group, splitGroups); len(splitJobs) > 0 { 195 jobs = append(jobs, splitJobs...) 196 continue 197 } 198 } 199 200 // If we reach this point, all blocks for this time range have already been split 201 // (or we're not processing the smallest time range, or splitting is disabled). 202 // Then, we can check if there's any group of blocks to be merged together for each shard. 203 for shardID, shardBlocks := range groupBlocksByShardID(group.blocks) { 204 // No merging to do if there are less than 2 blocks. 205 if len(shardBlocks) < 2 { 206 continue 207 } 208 209 jobs = append(jobs, &job{ 210 userID: userID, 211 stage: stageMerge, 212 shardID: shardID, 213 blocksGroup: blocksGroup{ 214 rangeStart: group.rangeStart, 215 rangeEnd: group.rangeEnd, 216 blocks: shardBlocks, 217 }, 218 }) 219 } 220 } 221 222 return jobs 223 } 224 225 // planSplitting returns a job to split the blocks in the input group or nil if there's nothing to do because 226 // all blocks in the group have already been split. 227 func planSplitting(userID string, group blocksGroup, splitGroups uint32) []*job { 228 blocks := group.getNonShardedBlocks() 229 if len(blocks) == 0 { 230 return nil 231 } 232 233 jobs := map[uint32]*job{} 234 235 if splitGroups == 0 { 236 splitGroups = 1 237 } 238 239 // The number of source blocks could be very large so, to have a better horizontal scaling, we should group 240 // the source blocks into N groups (where N = number of shards) and create a job for each group of blocks to 241 // merge and split. 242 for _, blk := range blocks { 243 splitGroup := block.HashBlockID(blk.ULID) % splitGroups 244 245 if jobs[splitGroup] == nil { 246 jobs[splitGroup] = &job{ 247 userID: userID, 248 stage: stageSplit, 249 shardID: sharding.FormatShardIDLabelValue(uint64(splitGroup), uint64(splitGroups)), 250 blocksGroup: blocksGroup{ 251 rangeStart: group.rangeStart, 252 rangeEnd: group.rangeEnd, 253 }, 254 } 255 } 256 257 jobs[splitGroup].blocks = append(jobs[splitGroup].blocks, blk) 258 } 259 260 // Convert the output. 261 out := make([]*job, 0, len(jobs)) 262 for _, job := range jobs { 263 out = append(out, job) 264 } 265 266 return out 267 } 268 269 // groupBlocksByShardID groups the blocks by shard ID (read from the block external labels). 270 // If a block doesn't have any shard ID in the external labels, it will be grouped with the 271 // shard ID set to an empty string. 272 func groupBlocksByShardID(blocks []*block.Meta) map[string][]*block.Meta { 273 groups := map[string][]*block.Meta{} 274 275 for _, block := range blocks { 276 // If the label doesn't exist, we'll group together such blocks using an 277 // empty string as shard ID. 278 shardID := block.Labels[sharding.CompactorShardIDLabel] 279 groups[shardID] = append(groups[shardID], block) 280 } 281 282 return groups 283 } 284 285 // groupBlocksByRange groups the blocks by the time range. The range sequence starts at 0. 286 // Input blocks MUST be sorted by MinTime. 287 // 288 // For example, if we have blocks [0-10, 10-20, 50-60, 90-100] and the split range tr is 30 289 // it returns [0-10, 10-20], [50-60], [90-100]. 290 func groupBlocksByRange(blocks []*block.Meta, tr int64) []blocksGroup { 291 var ret []blocksGroup 292 293 for i := 0; i < len(blocks); { 294 var ( 295 group blocksGroup 296 m = blocks[i] 297 ) 298 group.rangeStart = getRangeStart(m, tr) 299 group.rangeEnd = group.rangeStart + tr 300 301 // Skip blocks that don't fall into the range. This can happen via mis-alignment or 302 // by being the multiple of the intended range. 303 if int64(m.MaxTime) > group.rangeEnd { 304 i++ 305 continue 306 } 307 308 // Add all blocks to the current group that are within [t0, t0+tr]. 309 for ; i < len(blocks); i++ { 310 // If the block does not start within this group, then we should break the iteration 311 // and move it to the next group. 312 if int64(blocks[i].MinTime) >= group.rangeEnd { 313 break 314 } 315 316 // If the block doesn't fall into this group, but it started within this group then it 317 // means it spans across multiple ranges and we should skip it. 318 if int64(blocks[i].MaxTime) > group.rangeEnd { 319 continue 320 } 321 322 group.blocks = append(group.blocks, blocks[i]) 323 } 324 325 if len(group.blocks) > 0 { 326 ret = append(ret, group) 327 } 328 } 329 330 return ret 331 } 332 333 func getRangeStart(m *block.Meta, tr int64) int64 { 334 // Compute start of aligned time range of size tr closest to the current block's start. 335 if m.MinTime >= 0 { 336 return tr * (int64(m.MinTime) / tr) 337 } 338 return tr * ((int64(m.MinTime) - tr + 1) / tr) 339 } 340 341 func sortMetasByMinTime(metas []*block.Meta) []*block.Meta { 342 sort.Slice(metas, func(i, j int) bool { 343 if metas[i].MinTime != metas[j].MinTime { 344 return metas[i].MinTime < metas[j].MinTime 345 } 346 347 // Compare labels in case of same MinTime to get stable results. 348 return labels.Compare(labels.FromMap(metas[i].Labels), labels.FromMap(metas[j].Labels)) < 0 349 }) 350 351 return metas 352 } 353 354 // getMaxTime returns the highest max time across all input blocks. 355 func getMaxTime(blocks []*block.Meta) int64 { 356 maxTime := int64(math.MinInt64) 357 358 for _, block := range blocks { 359 if int64(block.MaxTime) > maxTime { 360 maxTime = int64(block.MaxTime) 361 } 362 } 363 364 return maxTime 365 } 366 367 // defaultGroupKeyWithoutShardID returns the default group key excluding ShardIDLabelName 368 // when computing it. 369 func defaultGroupKeyWithoutShardID(meta *block.Meta) string { 370 return defaultGroupKey(meta.Resolution, labelsWithout(meta.Labels, sharding.CompactorShardIDLabel, block.HostnameLabel)) 371 } 372 373 // labelsWithout returns a copy of the input labels without the given labels. 374 func labelsWithout(base map[string]string, without ...string) labels.Labels { 375 b := labels.NewScratchBuilder(len(base)) 376 Outer: 377 for k, v := range base { 378 for _, w := range without { 379 if k == w { 380 continue Outer 381 } 382 } 383 b.Add(k, v) 384 } 385 b.Sort() 386 return b.Labels() 387 }