github.com/grafana/pyroscope@v1.18.0/pkg/compactor/job.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/job.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 package compactor 6 7 import ( 8 "context" 9 "fmt" 10 "math" 11 "path" 12 "sort" 13 "time" 14 15 "github.com/oklog/ulid/v2" 16 "github.com/pkg/errors" 17 "github.com/prometheus/prometheus/model/labels" 18 19 "github.com/grafana/pyroscope/pkg/objstore" 20 "github.com/grafana/pyroscope/pkg/phlaredb/block" 21 ) 22 23 // Job holds a compaction job, which consists of a group of blocks that should be compacted together. 24 // Not goroutine safe. 25 type Job struct { 26 userID string 27 key string 28 labels labels.Labels 29 resolution int64 30 metasByMinTime []*block.Meta 31 useSplitting bool 32 shardingKey string 33 34 // The number of shards to split compacted block into. Not used if splitting is disabled. 35 splitNumShards uint32 36 splitStageSize uint32 37 } 38 39 // NewJob returns a new compaction Job. 40 func NewJob(userID string, key string, lset labels.Labels, resolution int64, useSplitting bool, splitNumShards, splitStageSize uint32, shardingKey string) *Job { 41 return &Job{ 42 userID: userID, 43 key: key, 44 labels: lset, 45 resolution: resolution, 46 useSplitting: useSplitting, 47 splitNumShards: splitNumShards, 48 splitStageSize: splitStageSize, 49 shardingKey: shardingKey, 50 } 51 } 52 53 // UserID returns the user/tenant to which this job belongs to. 54 func (job *Job) UserID() string { 55 return job.userID 56 } 57 58 // Key returns an identifier for the job. 59 func (job *Job) Key() string { 60 return job.key 61 } 62 63 // AppendMeta the block with the given meta to the job. 64 func (job *Job) AppendMeta(meta *block.Meta) error { 65 if !labels.Equal(labelsWithout(job.labels.Map(), block.HostnameLabel), labelsWithout(meta.Labels, block.HostnameLabel)) { 66 return errors.New("block and group labels do not match") 67 } 68 if job.resolution != meta.Resolution { 69 return errors.New("block and group resolution do not match") 70 } 71 72 job.metasByMinTime = append(job.metasByMinTime, meta) 73 sort.Slice(job.metasByMinTime, func(i, j int) bool { 74 return job.metasByMinTime[i].MinTime < job.metasByMinTime[j].MinTime 75 }) 76 return nil 77 } 78 79 // IDs returns all sorted IDs of blocks in the job. 80 func (job *Job) IDs() (ids []ulid.ULID) { 81 for _, m := range job.metasByMinTime { 82 ids = append(ids, m.ULID) 83 } 84 sort.Slice(ids, func(i, j int) bool { 85 return ids[i].Compare(ids[j]) < 0 86 }) 87 return ids 88 } 89 90 // MinTime returns the min time across all job's blocks. 91 func (job *Job) MinTime() int64 { 92 if len(job.metasByMinTime) > 0 { 93 return int64(job.metasByMinTime[0].MinTime) 94 } 95 return math.MaxInt64 96 } 97 98 // MaxTime returns the max time across all job's blocks. 99 func (job *Job) MaxTime() int64 { 100 max := int64(math.MinInt64) 101 for _, m := range job.metasByMinTime { 102 if int64(m.MaxTime) > max { 103 max = int64(m.MaxTime) 104 } 105 } 106 return max 107 } 108 109 // MinCompactionLevel returns the minimum compaction level across all source blocks 110 // in this job. 111 func (job *Job) MinCompactionLevel() int { 112 min := math.MaxInt 113 114 for _, m := range job.metasByMinTime { 115 if m.Compaction.Level < min { 116 min = m.Compaction.Level 117 } 118 } 119 120 return min 121 } 122 123 // Metas returns the metadata for each block that is part of this job, ordered by the block's MinTime 124 func (job *Job) Metas() []*block.Meta { 125 out := make([]*block.Meta, len(job.metasByMinTime)) 126 copy(out, job.metasByMinTime) 127 return out 128 } 129 130 // Labels returns the external labels for the output block(s) of this job. 131 func (job *Job) Labels() labels.Labels { 132 return job.labels 133 } 134 135 // Resolution returns the common downsampling resolution of blocks in the job. 136 func (job *Job) Resolution() int64 { 137 return job.resolution 138 } 139 140 // UseSplitting returns whether blocks should be split into multiple shards when compacted. 141 func (job *Job) UseSplitting() bool { 142 return job.useSplitting 143 } 144 145 // SplittingShards returns the number of output shards to build if splitting is enabled. 146 func (job *Job) SplittingShards() uint32 { 147 return job.splitNumShards 148 } 149 150 // SplitStageSize returns the number of stages split shards will be written to. 151 func (job *Job) SplitStageSize() uint32 { 152 return job.splitStageSize 153 } 154 155 // ShardingKey returns the key used to shard this job across multiple instances. 156 func (job *Job) ShardingKey() string { 157 return job.shardingKey 158 } 159 160 func (job *Job) String() string { 161 return fmt.Sprintf("%s (minTime: %d maxTime: %d)", job.Key(), job.MinTime(), job.MaxTime()) 162 } 163 164 // jobWaitPeriodElapsed returns whether the 1st level compaction wait period has 165 // elapsed for the input job. If the wait period has not elapsed, then this function 166 // also returns the Meta of the first source block encountered for which the wait 167 // period has not elapsed yet. 168 func jobWaitPeriodElapsed(ctx context.Context, job *Job, waitPeriod time.Duration, userBucket objstore.Bucket) (bool, *block.Meta, error) { 169 if waitPeriod <= 0 { 170 return true, nil, nil 171 } 172 173 if job.MinCompactionLevel() > 1 { 174 return true, nil, nil 175 } 176 177 // Check if the job contains any source block uploaded more recently 178 // than "wait period" ago. 179 threshold := time.Now().Add(-waitPeriod) 180 181 for _, meta := range job.Metas() { 182 metaPath := path.Join(meta.ULID.String(), block.MetaFilename) 183 184 attrs, err := userBucket.Attributes(ctx, metaPath) 185 if err != nil { 186 return false, meta, errors.Wrapf(err, "unable to get object attributes for %s", metaPath) 187 } 188 189 if attrs.LastModified.After(threshold) { 190 return false, meta, nil 191 } 192 } 193 194 return true, nil, nil 195 }