github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction/scheduler/scheduler.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"flag"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/hashicorp/raft"
     9  	"github.com/prometheus/client_golang/prometheus"
    10  	"go.etcd.io/bbolt"
    11  
    12  	"github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log"
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  	"github.com/grafana/pyroscope/pkg/metastore/compaction"
    15  	"github.com/grafana/pyroscope/pkg/metastore/compaction/scheduler/store"
    16  	"github.com/grafana/pyroscope/pkg/util"
    17  )
    18  
    19  var _ compaction.Scheduler = (*Scheduler)(nil)
    20  
    21  // Compaction job scheduler. Jobs are prioritized by the compaction level, and
    22  // the deadline time.
    23  //
    24  // Compaction workers own jobs while they are in progress. Ownership handling is
    25  // implemented using lease deadlines and fencing tokens:
    26  // https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html
    27  
    28  // JobStore does not really store jobs as they are: it explicitly
    29  // distinguishes between the job and the job state.
    30  //
    31  // Implementation note: block metadata should never be stored in StoreJob:
    32  // those are already stored in the metadata index.
    33  type JobStore interface {
    34  	StoreJobPlan(*bbolt.Tx, *raft_log.CompactionJobPlan) error
    35  	GetJobPlan(tx *bbolt.Tx, name string) (*raft_log.CompactionJobPlan, error)
    36  	DeleteJobPlan(tx *bbolt.Tx, name string) error
    37  
    38  	StoreJobState(*bbolt.Tx, *raft_log.CompactionJobState) error
    39  	DeleteJobState(tx *bbolt.Tx, name string) error
    40  	ListEntries(*bbolt.Tx) iter.Iterator[*raft_log.CompactionJobState]
    41  
    42  	CreateBuckets(*bbolt.Tx) error
    43  }
    44  
    45  type Config struct {
    46  	MaxFailures   uint64        `yaml:"compaction_max_failures" doc:""`
    47  	LeaseDuration time.Duration `yaml:"compaction_job_lease_duration" doc:""`
    48  	MaxQueueSize  uint64        `yaml:"compaction_max_job_queue_size" doc:""`
    49  }
    50  
    51  func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
    52  	f.Uint64Var(&c.MaxFailures, prefix+"compaction-max-failures", 3, "")
    53  	f.DurationVar(&c.LeaseDuration, prefix+"compaction-job-lease-duration", 15*time.Second, "")
    54  	f.Uint64Var(&c.MaxQueueSize, prefix+"compaction-max-job-queue-size", 10000, "")
    55  }
    56  
    57  type Scheduler struct {
    58  	config Config
    59  	store  JobStore
    60  	// Although the job queue is only accessed synchronously,
    61  	// the mutex is needed to collect stats.
    62  	mu    sync.Mutex
    63  	queue *schedulerQueue
    64  }
    65  
    66  // NewScheduler creates a scheduler with the given lease duration.
    67  // Typically, callers should update jobs at the interval not exceeding
    68  // the half of the lease duration.
    69  func NewScheduler(config Config, store JobStore, reg prometheus.Registerer) *Scheduler {
    70  	s := &Scheduler{
    71  		config: config,
    72  		store:  store,
    73  		queue:  newJobQueue(),
    74  	}
    75  	collector := newStatsCollector(s)
    76  	util.RegisterOrGet(reg, collector)
    77  	return s
    78  }
    79  
    80  func NewStore() *store.JobStore {
    81  	return store.NewJobStore()
    82  }
    83  
    84  func (sc *Scheduler) NewSchedule(tx *bbolt.Tx, cmd *raft.Log) compaction.Schedule {
    85  	return &schedule{
    86  		tx:        tx,
    87  		token:     cmd.Index,
    88  		now:       cmd.AppendedAt,
    89  		scheduler: sc,
    90  		updates:   make(map[string]*raft_log.CompactionJobState),
    91  	}
    92  }
    93  
    94  func (sc *Scheduler) UpdateSchedule(tx *bbolt.Tx, update *raft_log.CompactionPlanUpdate) error {
    95  	sc.mu.Lock()
    96  	defer sc.mu.Unlock()
    97  
    98  	for _, job := range update.EvictedJobs {
    99  		name := job.State.Name
   100  		if err := sc.store.DeleteJobPlan(tx, name); err != nil {
   101  			return err
   102  		}
   103  		if err := sc.store.DeleteJobState(tx, name); err != nil {
   104  			return err
   105  		}
   106  		sc.queue.evict(name)
   107  	}
   108  
   109  	for _, job := range update.NewJobs {
   110  		if err := sc.store.StoreJobPlan(tx, job.Plan); err != nil {
   111  			return err
   112  		}
   113  		if err := sc.store.StoreJobState(tx, job.State); err != nil {
   114  			return err
   115  		}
   116  		sc.queue.put(job.State)
   117  	}
   118  
   119  	for _, job := range update.UpdatedJobs {
   120  		if err := sc.store.StoreJobState(tx, job.State); err != nil {
   121  			return err
   122  		}
   123  		sc.queue.put(job.State)
   124  	}
   125  
   126  	for _, job := range update.AssignedJobs {
   127  		if err := sc.store.StoreJobState(tx, job.State); err != nil {
   128  			return err
   129  		}
   130  		sc.queue.put(job.State)
   131  	}
   132  
   133  	for _, job := range update.CompletedJobs {
   134  		name := job.State.Name
   135  		if err := sc.store.DeleteJobPlan(tx, name); err != nil {
   136  			return err
   137  		}
   138  		if err := sc.store.DeleteJobState(tx, name); err != nil {
   139  			return err
   140  		}
   141  		sc.queue.delete(name)
   142  	}
   143  
   144  	return nil
   145  }
   146  
   147  func (sc *Scheduler) Init(tx *bbolt.Tx) error {
   148  	return sc.store.CreateBuckets(tx)
   149  }
   150  
   151  func (sc *Scheduler) Restore(tx *bbolt.Tx) error {
   152  	sc.mu.Lock()
   153  	defer sc.mu.Unlock()
   154  	// Reset in-memory state before loading entries from the store.
   155  	sc.queue.reset()
   156  	entries := sc.store.ListEntries(tx)
   157  	defer func() {
   158  		_ = entries.Close()
   159  	}()
   160  	for entries.Next() {
   161  		sc.queue.put(entries.At())
   162  	}
   163  	// Zero all stats updated during Restore.
   164  	sc.queue.resetStats()
   165  	return entries.Err()
   166  }