github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction/compactor/plan.go (about)

     1  package compactor
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/cespare/xxhash/v2"
    10  
    11  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    12  	"github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log"
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  	"github.com/grafana/pyroscope/pkg/util"
    15  )
    16  
    17  // plan should be used to prepare the compaction plan update.
    18  // The implementation must have no side effects or alter the
    19  // Compactor in any way.
    20  type plan struct {
    21  	level uint32
    22  	// Read-only.
    23  	tombstones iter.Iterator[*metastorev1.Tombstones]
    24  	compactor  *Compactor
    25  	batches    *batchIter
    26  	blocks     *blockIter
    27  	now        int64
    28  }
    29  
    30  func (p *plan) CreateJob() (*raft_log.CompactionJobPlan, error) {
    31  	planned := p.nextJob()
    32  	if planned == nil {
    33  		return nil, nil
    34  	}
    35  	job := raft_log.CompactionJobPlan{
    36  		Name:            planned.name,
    37  		Shard:           planned.shard,
    38  		Tenant:          planned.tenant,
    39  		CompactionLevel: planned.level,
    40  		SourceBlocks:    planned.blocks,
    41  		Tombstones:      planned.tombstones,
    42  	}
    43  	return &job, nil
    44  }
    45  
    46  type jobPlan struct {
    47  	compactionKey
    48  	config     *Config
    49  	name       string
    50  	minT       int64
    51  	maxT       int64
    52  	tombstones []*metastorev1.Tombstones
    53  	blocks     []string
    54  }
    55  
    56  // Plan compaction of the queued blocks. The algorithm is simple:
    57  //   - Iterate block queues from low levels to higher ones.
    58  //   - Find the oldest batch in the order of arrival and try to compact it.
    59  //   - A batch may not translate into a job (e.g., if some blocks have been
    60  //     removed). Therefore, we navigate to the next batch with the same
    61  //     compaction key in this case.
    62  func (p *plan) nextJob() *jobPlan {
    63  	job := p.newJob()
    64  	for p.level < uint32(len(p.compactor.queue.levels)) {
    65  		if p.batches == nil {
    66  			level := p.compactor.queue.levels[p.level]
    67  			if level == nil {
    68  				p.level++
    69  				continue
    70  			}
    71  			p.batches = newBatchIter(level)
    72  		}
    73  
    74  		b, ok := p.batches.next()
    75  		if !ok {
    76  			// We've done with the current level: no more batches
    77  			// in the in-order queue. Move to the next level.
    78  			p.batches = nil
    79  			p.level++
    80  			continue
    81  		}
    82  
    83  		// We've found the oldest batch, it's time to plan a job.
    84  		// Job levels are zero based: L0 job means that it includes blocks
    85  		// with compaction level 0. This can be altered (1-based levels):
    86  		// job.level++
    87  		job.reset(b.staged.key)
    88  		p.blocks.setBatch(b)
    89  
    90  		var force bool
    91  		for {
    92  			// Peek the next block in the compaction queue with the same
    93  			// compaction key as the current batch. If there are no blocks
    94  			// in the current batch, or we have already visited all of them
    95  			// previously, the iterator will proceed to the next batch with
    96  			// this compaction key. The call to peek() will return false, if
    97  			// only no blocks eligible for compaction are left in the queue.
    98  			block, found := p.blocks.peek()
    99  			if !found {
   100  				// No more blocks with this compaction key at the level.
   101  				// We may want to force compaction even if the current job
   102  				// is incomplete: e.g., if the blocks remain in the queue for
   103  				// too long. Note that we do not check the block timestamps:
   104  				// we only care when the first (oldest) batch was created.
   105  				// We do want to check the _oldest_, not the _current_ batch
   106  				// here, because it could be relatively young.
   107  				force = p.compactor.config.exceedsMaxAge(b, p.now)
   108  				break
   109  			}
   110  			if !job.tryAdd(block) {
   111  				// We may not want to add a bock to the job if it extends the
   112  				// compacted block time range beyond the desired limit.
   113  				// In this case, we need to force compaction of incomplete job.
   114  				force = true
   115  				break
   116  			}
   117  			// If the block was added to the job, we advance the block iterator
   118  			// to the next block within the batch, remembering the current block
   119  			// as a visited one.
   120  			p.blocks.advance()
   121  			if job.isComplete() {
   122  				break
   123  			}
   124  		}
   125  
   126  		if len(job.blocks) > 0 && (job.isComplete() || force) {
   127  			// Typically, we want to proceed to the next compaction key,
   128  			// but if the batch is not empty (i.e., we could not put all
   129  			// the blocks into the job), we must finish it first.
   130  			if p.blocks.more() {
   131  				// There are more blocks in the current batch: p.blocks.peek()
   132  				// reported a block is found, but we could not add it to the job.
   133  				//
   134  				// We need to reset the batch iterator to continue from the oldest
   135  				// batch that still has blocks to process: basically, we want
   136  				// p.batches.next() to return b.
   137  				//
   138  				// This ensures we don't skip blocks or process them out of order.
   139  				// Block iterator ensures that each block is only accessed once.
   140  				//
   141  				// If the queue that b points to has any unvisited blocks,
   142  				// p.blocks.peek() will return them. Otherwise, we continue
   143  				// iterating over the in-order queue of batches (different
   144  				// compaction queues have distinct compaction keys).
   145  				//
   146  				// We assume that we can re-iterate over the batch blocks next time,
   147  				// skipping the ones that have already been visited (it's done by
   148  				// iterator internally).
   149  				p.batches.reset(b)
   150  			}
   151  			p.getTombstones(job)
   152  			job.finalize()
   153  			return job
   154  		}
   155  
   156  		// The job plan is canceled for the compaction key, and we need to
   157  		// continue with the next compaction key, or level.
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  func (p *plan) getTombstones(job *jobPlan) {
   164  	if int32(p.level) > p.compactor.config.CleanupJobMaxLevel {
   165  		return
   166  	}
   167  	if int32(p.level) < p.compactor.config.CleanupJobMinLevel {
   168  		return
   169  	}
   170  	s := int(p.compactor.config.CleanupBatchSize)
   171  	for i := 0; i < s && p.tombstones.Next(); i++ {
   172  		job.tombstones = append(job.tombstones, p.tombstones.At())
   173  	}
   174  }
   175  
   176  func (p *plan) newJob() *jobPlan {
   177  	return &jobPlan{
   178  		config: &p.compactor.config,
   179  		blocks: make([]string, 0, defaultBlockBatchSize),
   180  		minT:   math.MaxInt64,
   181  		maxT:   math.MinInt64,
   182  	}
   183  }
   184  
   185  func (job *jobPlan) reset(k compactionKey) {
   186  	job.compactionKey = k
   187  	job.blocks = job.blocks[:0]
   188  	job.minT = math.MaxInt64
   189  	job.maxT = math.MinInt64
   190  }
   191  
   192  func (job *jobPlan) tryAdd(block string) bool {
   193  	t := util.ULIDStringUnixNano(block)
   194  	if len(job.blocks) > 0 && !job.isInAllowedTimeRange(t) {
   195  		return false
   196  	}
   197  	job.blocks = append(job.blocks, block)
   198  	job.maxT = max(job.maxT, t)
   199  	job.minT = min(job.minT, t)
   200  	return true
   201  }
   202  
   203  func (job *jobPlan) isInAllowedTimeRange(t int64) bool {
   204  	if age := job.config.maxAge(job.config.maxLevel()); age > 0 {
   205  		//          minT        maxT
   206  		// --t------|===========|------t--
   207  		//   |      |---------a--------|
   208  		//   |---------b--------|
   209  		a := t - job.minT
   210  		b := job.maxT - t
   211  		if a > age || b > age {
   212  			return false
   213  		}
   214  	}
   215  	return true
   216  }
   217  
   218  func (job *jobPlan) isComplete() bool {
   219  	return uint(len(job.blocks)) >= job.config.maxBlocks(job.level)
   220  }
   221  
   222  func (job *jobPlan) finalize() {
   223  	nameJob(job)
   224  	job.minT = 0
   225  	job.maxT = 0
   226  	job.config = nil
   227  }
   228  
   229  // Job name is a variable length string that should be globally unique
   230  // and is used as a tiebreaker in the compaction job queue ordering.
   231  func nameJob(plan *jobPlan) {
   232  	// Should be on stack; 16b per block; expected ~20 blocks.
   233  	buf := make([]byte, 0, 512)
   234  	for _, b := range plan.blocks {
   235  		buf = append(buf, b...)
   236  	}
   237  	var name strings.Builder
   238  	name.WriteString(fmt.Sprintf("%x", xxhash.Sum64(buf)))
   239  	name.WriteByte('-')
   240  	name.WriteByte('T')
   241  	name.WriteString(plan.tenant)
   242  	name.WriteByte('-')
   243  	name.WriteByte('S')
   244  	name.WriteString(strconv.FormatUint(uint64(plan.shard), 10))
   245  	name.WriteByte('-')
   246  	name.WriteByte('L')
   247  	name.WriteString(strconv.FormatUint(uint64(plan.level), 10))
   248  	plan.name = name.String()
   249  }