github.com/grafana/pyroscope@v1.18.0/pkg/compactor/job_sorting.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/job_sorting.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"sort"
    10  )
    11  
    12  const (
    13  	CompactionOrderOldestFirst = "smallest-range-oldest-blocks-first"
    14  	CompactionOrderNewestFirst = "newest-blocks-first"
    15  )
    16  
    17  var CompactionOrders = []string{CompactionOrderOldestFirst, CompactionOrderNewestFirst}
    18  
    19  type JobsOrderFunc func(jobs []*Job) []*Job
    20  
    21  // GetJobsOrderFunction returns jobs ordering function, or nil, if name doesn't refer to any function.
    22  func GetJobsOrderFunction(name string) JobsOrderFunc {
    23  	switch name {
    24  	case CompactionOrderNewestFirst:
    25  		return sortJobsByNewestBlocksFirst
    26  	case CompactionOrderOldestFirst:
    27  		return sortJobsBySmallestRangeOldestBlocksFirst
    28  	default:
    29  		return nil
    30  	}
    31  }
    32  
    33  // sortJobsBySmallestRangeOldestBlocksFirst returns input jobs sorted by smallest range, oldest min time first.
    34  // The rationale of this sorting is that we may want to favor smaller ranges first (ie. to deduplicate samples
    35  // sooner than later) and older ones are more likely to be "complete" (no missing block still to be uploaded).
    36  // Split jobs are moved to the beginning of the output, because merge jobs are only generated if there are no split jobs in the
    37  // same time range, so finishing split jobs first unblocks more jobs and gives opportunity to more compactors
    38  // to work on them.
    39  func sortJobsBySmallestRangeOldestBlocksFirst(jobs []*Job) []*Job {
    40  	sort.SliceStable(jobs, func(i, j int) bool {
    41  		// Move split jobs to the front.
    42  		if jobs[i].UseSplitting() && !jobs[j].UseSplitting() {
    43  			return true
    44  		}
    45  
    46  		if !jobs[i].UseSplitting() && jobs[j].UseSplitting() {
    47  			return false
    48  		}
    49  
    50  		checkLength := !jobs[i].UseSplitting() || !jobs[j].UseSplitting()
    51  		// Don't check length for splitting jobs. We want to the oldest split blocks to be first, no matter the length.
    52  
    53  		if checkLength {
    54  			iLength := jobs[i].MaxTime() - jobs[i].MinTime()
    55  			jLength := jobs[j].MaxTime() - jobs[j].MinTime()
    56  
    57  			if iLength != jLength {
    58  				return iLength < jLength
    59  			}
    60  		}
    61  
    62  		if jobs[i].MinTime() != jobs[j].MinTime() {
    63  			return jobs[i].MinTime() < jobs[j].MinTime()
    64  		}
    65  
    66  		// Guarantee stable sort for tests.
    67  		return jobs[i].Key() < jobs[j].Key()
    68  	})
    69  
    70  	return jobs
    71  }
    72  
    73  // sortJobsByNewestBlocksFirst returns input jobs sorted by most recent time ranges first
    74  // (regardless of their compaction level). The rationale of this sorting is that in case the
    75  // compactor is lagging behind, we compact up to the largest range (eg. 24h) the most recent
    76  // blocks first and the move to older ones. Most recent blocks are the one more likely to be queried.
    77  func sortJobsByNewestBlocksFirst(jobs []*Job) []*Job {
    78  	sort.SliceStable(jobs, func(i, j int) bool {
    79  		iMaxTime := jobs[i].MaxTime()
    80  		jMaxTime := jobs[j].MaxTime()
    81  		if iMaxTime != jMaxTime {
    82  			return iMaxTime > jMaxTime
    83  		}
    84  
    85  		iLength := iMaxTime - jobs[i].MinTime()
    86  		jLength := jMaxTime - jobs[j].MinTime()
    87  		if iLength != jLength {
    88  			return iLength < jLength
    89  		}
    90  
    91  		// Guarantee stable sort for tests.
    92  		return jobs[i].Key() < jobs[j].Key()
    93  	})
    94  
    95  	return jobs
    96  }