github.com/grafana/pyroscope@v1.18.0/pkg/compactor/job.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/job.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  package compactor
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"math"
    11  	"path"
    12  	"sort"
    13  	"time"
    14  
    15  	"github.com/oklog/ulid/v2"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/prometheus/model/labels"
    18  
    19  	"github.com/grafana/pyroscope/pkg/objstore"
    20  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    21  )
    22  
    23  // Job holds a compaction job, which consists of a group of blocks that should be compacted together.
    24  // Not goroutine safe.
    25  type Job struct {
    26  	userID         string
    27  	key            string
    28  	labels         labels.Labels
    29  	resolution     int64
    30  	metasByMinTime []*block.Meta
    31  	useSplitting   bool
    32  	shardingKey    string
    33  
    34  	// The number of shards to split compacted block into. Not used if splitting is disabled.
    35  	splitNumShards uint32
    36  	splitStageSize uint32
    37  }
    38  
    39  // NewJob returns a new compaction Job.
    40  func NewJob(userID string, key string, lset labels.Labels, resolution int64, useSplitting bool, splitNumShards, splitStageSize uint32, shardingKey string) *Job {
    41  	return &Job{
    42  		userID:         userID,
    43  		key:            key,
    44  		labels:         lset,
    45  		resolution:     resolution,
    46  		useSplitting:   useSplitting,
    47  		splitNumShards: splitNumShards,
    48  		splitStageSize: splitStageSize,
    49  		shardingKey:    shardingKey,
    50  	}
    51  }
    52  
    53  // UserID returns the user/tenant to which this job belongs to.
    54  func (job *Job) UserID() string {
    55  	return job.userID
    56  }
    57  
    58  // Key returns an identifier for the job.
    59  func (job *Job) Key() string {
    60  	return job.key
    61  }
    62  
    63  // AppendMeta the block with the given meta to the job.
    64  func (job *Job) AppendMeta(meta *block.Meta) error {
    65  	if !labels.Equal(labelsWithout(job.labels.Map(), block.HostnameLabel), labelsWithout(meta.Labels, block.HostnameLabel)) {
    66  		return errors.New("block and group labels do not match")
    67  	}
    68  	if job.resolution != meta.Resolution {
    69  		return errors.New("block and group resolution do not match")
    70  	}
    71  
    72  	job.metasByMinTime = append(job.metasByMinTime, meta)
    73  	sort.Slice(job.metasByMinTime, func(i, j int) bool {
    74  		return job.metasByMinTime[i].MinTime < job.metasByMinTime[j].MinTime
    75  	})
    76  	return nil
    77  }
    78  
    79  // IDs returns all sorted IDs of blocks in the job.
    80  func (job *Job) IDs() (ids []ulid.ULID) {
    81  	for _, m := range job.metasByMinTime {
    82  		ids = append(ids, m.ULID)
    83  	}
    84  	sort.Slice(ids, func(i, j int) bool {
    85  		return ids[i].Compare(ids[j]) < 0
    86  	})
    87  	return ids
    88  }
    89  
    90  // MinTime returns the min time across all job's blocks.
    91  func (job *Job) MinTime() int64 {
    92  	if len(job.metasByMinTime) > 0 {
    93  		return int64(job.metasByMinTime[0].MinTime)
    94  	}
    95  	return math.MaxInt64
    96  }
    97  
    98  // MaxTime returns the max time across all job's blocks.
    99  func (job *Job) MaxTime() int64 {
   100  	max := int64(math.MinInt64)
   101  	for _, m := range job.metasByMinTime {
   102  		if int64(m.MaxTime) > max {
   103  			max = int64(m.MaxTime)
   104  		}
   105  	}
   106  	return max
   107  }
   108  
   109  // MinCompactionLevel returns the minimum compaction level across all source blocks
   110  // in this job.
   111  func (job *Job) MinCompactionLevel() int {
   112  	min := math.MaxInt
   113  
   114  	for _, m := range job.metasByMinTime {
   115  		if m.Compaction.Level < min {
   116  			min = m.Compaction.Level
   117  		}
   118  	}
   119  
   120  	return min
   121  }
   122  
   123  // Metas returns the metadata for each block that is part of this job, ordered by the block's MinTime
   124  func (job *Job) Metas() []*block.Meta {
   125  	out := make([]*block.Meta, len(job.metasByMinTime))
   126  	copy(out, job.metasByMinTime)
   127  	return out
   128  }
   129  
   130  // Labels returns the external labels for the output block(s) of this job.
   131  func (job *Job) Labels() labels.Labels {
   132  	return job.labels
   133  }
   134  
   135  // Resolution returns the common downsampling resolution of blocks in the job.
   136  func (job *Job) Resolution() int64 {
   137  	return job.resolution
   138  }
   139  
   140  // UseSplitting returns whether blocks should be split into multiple shards when compacted.
   141  func (job *Job) UseSplitting() bool {
   142  	return job.useSplitting
   143  }
   144  
   145  // SplittingShards returns the number of output shards to build if splitting is enabled.
   146  func (job *Job) SplittingShards() uint32 {
   147  	return job.splitNumShards
   148  }
   149  
   150  // SplitStageSize returns the number of stages split shards will be written to.
   151  func (job *Job) SplitStageSize() uint32 {
   152  	return job.splitStageSize
   153  }
   154  
   155  // ShardingKey returns the key used to shard this job across multiple instances.
   156  func (job *Job) ShardingKey() string {
   157  	return job.shardingKey
   158  }
   159  
   160  func (job *Job) String() string {
   161  	return fmt.Sprintf("%s (minTime: %d maxTime: %d)", job.Key(), job.MinTime(), job.MaxTime())
   162  }
   163  
   164  // jobWaitPeriodElapsed returns whether the 1st level compaction wait period has
   165  // elapsed for the input job. If the wait period has not elapsed, then this function
   166  // also returns the Meta of the first source block encountered for which the wait
   167  // period has not elapsed yet.
   168  func jobWaitPeriodElapsed(ctx context.Context, job *Job, waitPeriod time.Duration, userBucket objstore.Bucket) (bool, *block.Meta, error) {
   169  	if waitPeriod <= 0 {
   170  		return true, nil, nil
   171  	}
   172  
   173  	if job.MinCompactionLevel() > 1 {
   174  		return true, nil, nil
   175  	}
   176  
   177  	// Check if the job contains any source block uploaded more recently
   178  	// than "wait period" ago.
   179  	threshold := time.Now().Add(-waitPeriod)
   180  
   181  	for _, meta := range job.Metas() {
   182  		metaPath := path.Join(meta.ULID.String(), block.MetaFilename)
   183  
   184  		attrs, err := userBucket.Attributes(ctx, metaPath)
   185  		if err != nil {
   186  			return false, meta, errors.Wrapf(err, "unable to get object attributes for %s", metaPath)
   187  		}
   188  
   189  		if attrs.LastModified.After(threshold) {
   190  			return false, meta, nil
   191  		}
   192  	}
   193  
   194  	return true, nil, nil
   195  }