github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction/compactor/plan.go (about) 1 package compactor 2 3 import ( 4 "fmt" 5 "math" 6 "strconv" 7 "strings" 8 9 "github.com/cespare/xxhash/v2" 10 11 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 12 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log" 13 "github.com/grafana/pyroscope/pkg/iter" 14 "github.com/grafana/pyroscope/pkg/util" 15 ) 16 17 // plan should be used to prepare the compaction plan update. 18 // The implementation must have no side effects or alter the 19 // Compactor in any way. 20 type plan struct { 21 level uint32 22 // Read-only. 23 tombstones iter.Iterator[*metastorev1.Tombstones] 24 compactor *Compactor 25 batches *batchIter 26 blocks *blockIter 27 now int64 28 } 29 30 func (p *plan) CreateJob() (*raft_log.CompactionJobPlan, error) { 31 planned := p.nextJob() 32 if planned == nil { 33 return nil, nil 34 } 35 job := raft_log.CompactionJobPlan{ 36 Name: planned.name, 37 Shard: planned.shard, 38 Tenant: planned.tenant, 39 CompactionLevel: planned.level, 40 SourceBlocks: planned.blocks, 41 Tombstones: planned.tombstones, 42 } 43 return &job, nil 44 } 45 46 type jobPlan struct { 47 compactionKey 48 config *Config 49 name string 50 minT int64 51 maxT int64 52 tombstones []*metastorev1.Tombstones 53 blocks []string 54 } 55 56 // Plan compaction of the queued blocks. The algorithm is simple: 57 // - Iterate block queues from low levels to higher ones. 58 // - Find the oldest batch in the order of arrival and try to compact it. 59 // - A batch may not translate into a job (e.g., if some blocks have been 60 // removed). Therefore, we navigate to the next batch with the same 61 // compaction key in this case. 62 func (p *plan) nextJob() *jobPlan { 63 job := p.newJob() 64 for p.level < uint32(len(p.compactor.queue.levels)) { 65 if p.batches == nil { 66 level := p.compactor.queue.levels[p.level] 67 if level == nil { 68 p.level++ 69 continue 70 } 71 p.batches = newBatchIter(level) 72 } 73 74 b, ok := p.batches.next() 75 if !ok { 76 // We've done with the current level: no more batches 77 // in the in-order queue. Move to the next level. 78 p.batches = nil 79 p.level++ 80 continue 81 } 82 83 // We've found the oldest batch, it's time to plan a job. 84 // Job levels are zero based: L0 job means that it includes blocks 85 // with compaction level 0. This can be altered (1-based levels): 86 // job.level++ 87 job.reset(b.staged.key) 88 p.blocks.setBatch(b) 89 90 var force bool 91 for { 92 // Peek the next block in the compaction queue with the same 93 // compaction key as the current batch. If there are no blocks 94 // in the current batch, or we have already visited all of them 95 // previously, the iterator will proceed to the next batch with 96 // this compaction key. The call to peek() will return false, if 97 // only no blocks eligible for compaction are left in the queue. 98 block, found := p.blocks.peek() 99 if !found { 100 // No more blocks with this compaction key at the level. 101 // We may want to force compaction even if the current job 102 // is incomplete: e.g., if the blocks remain in the queue for 103 // too long. Note that we do not check the block timestamps: 104 // we only care when the first (oldest) batch was created. 105 // We do want to check the _oldest_, not the _current_ batch 106 // here, because it could be relatively young. 107 force = p.compactor.config.exceedsMaxAge(b, p.now) 108 break 109 } 110 if !job.tryAdd(block) { 111 // We may not want to add a bock to the job if it extends the 112 // compacted block time range beyond the desired limit. 113 // In this case, we need to force compaction of incomplete job. 114 force = true 115 break 116 } 117 // If the block was added to the job, we advance the block iterator 118 // to the next block within the batch, remembering the current block 119 // as a visited one. 120 p.blocks.advance() 121 if job.isComplete() { 122 break 123 } 124 } 125 126 if len(job.blocks) > 0 && (job.isComplete() || force) { 127 // Typically, we want to proceed to the next compaction key, 128 // but if the batch is not empty (i.e., we could not put all 129 // the blocks into the job), we must finish it first. 130 if p.blocks.more() { 131 // There are more blocks in the current batch: p.blocks.peek() 132 // reported a block is found, but we could not add it to the job. 133 // 134 // We need to reset the batch iterator to continue from the oldest 135 // batch that still has blocks to process: basically, we want 136 // p.batches.next() to return b. 137 // 138 // This ensures we don't skip blocks or process them out of order. 139 // Block iterator ensures that each block is only accessed once. 140 // 141 // If the queue that b points to has any unvisited blocks, 142 // p.blocks.peek() will return them. Otherwise, we continue 143 // iterating over the in-order queue of batches (different 144 // compaction queues have distinct compaction keys). 145 // 146 // We assume that we can re-iterate over the batch blocks next time, 147 // skipping the ones that have already been visited (it's done by 148 // iterator internally). 149 p.batches.reset(b) 150 } 151 p.getTombstones(job) 152 job.finalize() 153 return job 154 } 155 156 // The job plan is canceled for the compaction key, and we need to 157 // continue with the next compaction key, or level. 158 } 159 160 return nil 161 } 162 163 func (p *plan) getTombstones(job *jobPlan) { 164 if int32(p.level) > p.compactor.config.CleanupJobMaxLevel { 165 return 166 } 167 if int32(p.level) < p.compactor.config.CleanupJobMinLevel { 168 return 169 } 170 s := int(p.compactor.config.CleanupBatchSize) 171 for i := 0; i < s && p.tombstones.Next(); i++ { 172 job.tombstones = append(job.tombstones, p.tombstones.At()) 173 } 174 } 175 176 func (p *plan) newJob() *jobPlan { 177 return &jobPlan{ 178 config: &p.compactor.config, 179 blocks: make([]string, 0, defaultBlockBatchSize), 180 minT: math.MaxInt64, 181 maxT: math.MinInt64, 182 } 183 } 184 185 func (job *jobPlan) reset(k compactionKey) { 186 job.compactionKey = k 187 job.blocks = job.blocks[:0] 188 job.minT = math.MaxInt64 189 job.maxT = math.MinInt64 190 } 191 192 func (job *jobPlan) tryAdd(block string) bool { 193 t := util.ULIDStringUnixNano(block) 194 if len(job.blocks) > 0 && !job.isInAllowedTimeRange(t) { 195 return false 196 } 197 job.blocks = append(job.blocks, block) 198 job.maxT = max(job.maxT, t) 199 job.minT = min(job.minT, t) 200 return true 201 } 202 203 func (job *jobPlan) isInAllowedTimeRange(t int64) bool { 204 if age := job.config.maxAge(job.config.maxLevel()); age > 0 { 205 // minT maxT 206 // --t------|===========|------t-- 207 // | |---------a--------| 208 // |---------b--------| 209 a := t - job.minT 210 b := job.maxT - t 211 if a > age || b > age { 212 return false 213 } 214 } 215 return true 216 } 217 218 func (job *jobPlan) isComplete() bool { 219 return uint(len(job.blocks)) >= job.config.maxBlocks(job.level) 220 } 221 222 func (job *jobPlan) finalize() { 223 nameJob(job) 224 job.minT = 0 225 job.maxT = 0 226 job.config = nil 227 } 228 229 // Job name is a variable length string that should be globally unique 230 // and is used as a tiebreaker in the compaction job queue ordering. 231 func nameJob(plan *jobPlan) { 232 // Should be on stack; 16b per block; expected ~20 blocks. 233 buf := make([]byte, 0, 512) 234 for _, b := range plan.blocks { 235 buf = append(buf, b...) 236 } 237 var name strings.Builder 238 name.WriteString(fmt.Sprintf("%x", xxhash.Sum64(buf))) 239 name.WriteByte('-') 240 name.WriteByte('T') 241 name.WriteString(plan.tenant) 242 name.WriteByte('-') 243 name.WriteByte('S') 244 name.WriteString(strconv.FormatUint(uint64(plan.shard), 10)) 245 name.WriteByte('-') 246 name.WriteByte('L') 247 name.WriteString(strconv.FormatUint(uint64(plan.level), 10)) 248 plan.name = name.String() 249 }