github.com/grafana/pyroscope@v1.18.0/pkg/compactor/job_sorting.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/job_sorting.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "sort" 10 ) 11 12 const ( 13 CompactionOrderOldestFirst = "smallest-range-oldest-blocks-first" 14 CompactionOrderNewestFirst = "newest-blocks-first" 15 ) 16 17 var CompactionOrders = []string{CompactionOrderOldestFirst, CompactionOrderNewestFirst} 18 19 type JobsOrderFunc func(jobs []*Job) []*Job 20 21 // GetJobsOrderFunction returns jobs ordering function, or nil, if name doesn't refer to any function. 22 func GetJobsOrderFunction(name string) JobsOrderFunc { 23 switch name { 24 case CompactionOrderNewestFirst: 25 return sortJobsByNewestBlocksFirst 26 case CompactionOrderOldestFirst: 27 return sortJobsBySmallestRangeOldestBlocksFirst 28 default: 29 return nil 30 } 31 } 32 33 // sortJobsBySmallestRangeOldestBlocksFirst returns input jobs sorted by smallest range, oldest min time first. 34 // The rationale of this sorting is that we may want to favor smaller ranges first (ie. to deduplicate samples 35 // sooner than later) and older ones are more likely to be "complete" (no missing block still to be uploaded). 36 // Split jobs are moved to the beginning of the output, because merge jobs are only generated if there are no split jobs in the 37 // same time range, so finishing split jobs first unblocks more jobs and gives opportunity to more compactors 38 // to work on them. 39 func sortJobsBySmallestRangeOldestBlocksFirst(jobs []*Job) []*Job { 40 sort.SliceStable(jobs, func(i, j int) bool { 41 // Move split jobs to the front. 42 if jobs[i].UseSplitting() && !jobs[j].UseSplitting() { 43 return true 44 } 45 46 if !jobs[i].UseSplitting() && jobs[j].UseSplitting() { 47 return false 48 } 49 50 checkLength := !jobs[i].UseSplitting() || !jobs[j].UseSplitting() 51 // Don't check length for splitting jobs. We want to the oldest split blocks to be first, no matter the length. 52 53 if checkLength { 54 iLength := jobs[i].MaxTime() - jobs[i].MinTime() 55 jLength := jobs[j].MaxTime() - jobs[j].MinTime() 56 57 if iLength != jLength { 58 return iLength < jLength 59 } 60 } 61 62 if jobs[i].MinTime() != jobs[j].MinTime() { 63 return jobs[i].MinTime() < jobs[j].MinTime() 64 } 65 66 // Guarantee stable sort for tests. 67 return jobs[i].Key() < jobs[j].Key() 68 }) 69 70 return jobs 71 } 72 73 // sortJobsByNewestBlocksFirst returns input jobs sorted by most recent time ranges first 74 // (regardless of their compaction level). The rationale of this sorting is that in case the 75 // compactor is lagging behind, we compact up to the largest range (eg. 24h) the most recent 76 // blocks first and the move to older ones. Most recent blocks are the one more likely to be queried. 77 func sortJobsByNewestBlocksFirst(jobs []*Job) []*Job { 78 sort.SliceStable(jobs, func(i, j int) bool { 79 iMaxTime := jobs[i].MaxTime() 80 jMaxTime := jobs[j].MaxTime() 81 if iMaxTime != jMaxTime { 82 return iMaxTime > jMaxTime 83 } 84 85 iLength := iMaxTime - jobs[i].MinTime() 86 jLength := jMaxTime - jobs[j].MinTime() 87 if iLength != jLength { 88 return iLength < jLength 89 } 90 91 // Guarantee stable sort for tests. 92 return jobs[i].Key() < jobs[j].Key() 93 }) 94 95 return jobs 96 }