github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction_raft_handler.go (about)

     1  package metastore
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/go-kit/log"
     7  	"github.com/go-kit/log/level"
     8  	"github.com/hashicorp/raft"
     9  	"github.com/opentracing/opentracing-go/ext"
    10  	"go.etcd.io/bbolt"
    11  
    12  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    13  	"github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log"
    14  	"github.com/grafana/pyroscope/pkg/metastore/compaction"
    15  	"github.com/grafana/pyroscope/pkg/metastore/tracing"
    16  )
    17  
    18  type IndexReplacer interface {
    19  	ReplaceBlocks(*bbolt.Tx, *metastorev1.CompactedBlocks) error
    20  }
    21  
    22  type CompactionCommandHandler struct {
    23  	logger     log.Logger
    24  	index      IndexReplacer
    25  	compactor  compaction.Compactor
    26  	planner    compaction.Planner
    27  	scheduler  compaction.Scheduler
    28  	tombstones Tombstones
    29  }
    30  
    31  func NewCompactionCommandHandler(
    32  	logger log.Logger,
    33  	index IndexReplacer,
    34  	compactor compaction.Compactor,
    35  	planner compaction.Planner,
    36  	scheduler compaction.Scheduler,
    37  	tombstones Tombstones,
    38  ) *CompactionCommandHandler {
    39  	return &CompactionCommandHandler{
    40  		logger:     logger,
    41  		index:      index,
    42  		compactor:  compactor,
    43  		planner:    planner,
    44  		scheduler:  scheduler,
    45  		tombstones: tombstones,
    46  	}
    47  }
    48  
    49  func (h *CompactionCommandHandler) GetCompactionPlanUpdate(
    50  	ctx context.Context, tx *bbolt.Tx, cmd *raft.Log, req *raft_log.GetCompactionPlanUpdateRequest,
    51  ) (resp *raft_log.GetCompactionPlanUpdateResponse, err error) {
    52  	span, _ := tracing.StartSpanFromContext(ctx, "raft.GetCompactionPlanUpdate")
    53  	span.SetTag("status_updates", len(req.StatusUpdates))
    54  	span.SetTag("assign_jobs_max", req.AssignJobsMax)
    55  	span.SetTag("raft_log_index", cmd.Index)
    56  	span.SetTag("raft_log_term", cmd.Term)
    57  	defer func() {
    58  		if err != nil {
    59  			ext.LogError(span, err)
    60  		}
    61  		span.Finish()
    62  	}()
    63  
    64  	// We need to generate a plan of the update caused by the new status
    65  	// report from the worker. The plan will be used to update the schedule
    66  	// after the Raft consensus is reached.
    67  	planner := h.planner.NewPlan(cmd)
    68  	schedule := h.scheduler.NewSchedule(tx, cmd)
    69  	p := new(raft_log.CompactionPlanUpdate)
    70  
    71  	// Any status update may translate to either a job lease refresh, or a
    72  	// completed job. Status update might be rejected, if the worker has
    73  	// lost the job. We treat revoked jobs as vacant slots for new
    74  	// assignments, therefore we try to update jobs' status first.
    75  	var revoked int
    76  	for _, status := range req.StatusUpdates {
    77  		switch state := schedule.UpdateJob(status); {
    78  		case state == nil:
    79  			// Nil state indicates that the job has been abandoned and
    80  			// reassigned, or the request is not valid. This may happen
    81  			// from time to time, and we should just ignore such requests.
    82  			revoked++
    83  
    84  		case state.Status == metastorev1.CompactionJobStatus_COMPACTION_STATUS_SUCCESS:
    85  			p.CompletedJobs = append(p.CompletedJobs, &raft_log.CompletedCompactionJob{State: state})
    86  
    87  		case state.Status == metastorev1.CompactionJobStatus_COMPACTION_STATUS_IN_PROGRESS:
    88  			p.UpdatedJobs = append(p.UpdatedJobs, &raft_log.UpdatedCompactionJob{State: state})
    89  
    90  		default:
    91  			// Unknown statuses are ignored. From the worker perspective,
    92  			// the job is re-assigned.
    93  		}
    94  	}
    95  
    96  	// AssignJobsMax tells us how many free slots the worker has. We need to
    97  	// account for the revoked jobs, as they are freeing the worker slots.
    98  	capacity := int(req.AssignJobsMax) + revoked
    99  
   100  	// Next, we need to create new jobs and assign existing
   101  	//
   102  	// NOTE(kolesnikovae): On one hand, if we assign first, we may violate the
   103  	// SJF principle. If we plan new jobs first, it may cause starvation of
   104  	// lower-priority jobs, when the compaction worker does not keep up with
   105  	// the high-priority job influx. As of now, we assign jobs before creating
   106  	// ones. If we change it, we need to make sure that the Schedule
   107  	// implementation allows doing this.
   108  	for assigned := 0; assigned < capacity; assigned++ {
   109  		job, err := schedule.AssignJob()
   110  		if err != nil {
   111  			level.Error(h.logger).Log("msg", "failed to assign compaction job", "err", err)
   112  			return nil, err
   113  		}
   114  		if job != nil {
   115  			p.AssignedJobs = append(p.AssignedJobs, job)
   116  		}
   117  	}
   118  
   119  	for created := 0; created < capacity; created++ {
   120  		// Evict jobs that cannot be assigned to workers.
   121  		if evicted := schedule.EvictJob(); evicted != nil {
   122  			level.Debug(h.logger).Log("msg", "planning to evict failed job", "job", evicted.Name)
   123  			p.EvictedJobs = append(p.EvictedJobs, &raft_log.EvictedCompactionJob{
   124  				State: evicted,
   125  			})
   126  		}
   127  		plan, err := planner.CreateJob()
   128  		if err != nil {
   129  			level.Error(h.logger).Log("msg", "failed to create compaction job", "err", err)
   130  			return nil, err
   131  		}
   132  		if plan == nil {
   133  			// No more jobs to create.
   134  			break
   135  		}
   136  		state := schedule.AddJob(plan)
   137  		if state == nil {
   138  			// Scheduler declined the job. The only case when this may happen
   139  			// is when the scheduler queue is full; theoretically, this should
   140  			// not happen, because we evicted jobs before creating new ones.
   141  			// However, if all the jobs are healthy, we may end up here.
   142  			level.Warn(h.logger).Log("msg", "compaction job rejected by scheduler")
   143  			break
   144  		}
   145  		p.NewJobs = append(p.NewJobs, &raft_log.NewCompactionJob{
   146  			State: state,
   147  			Plan:  plan,
   148  		})
   149  	}
   150  
   151  	span.SetTag("assigned_jobs", len(p.AssignedJobs))
   152  	span.SetTag("new_jobs", len(p.NewJobs))
   153  	span.SetTag("evicted_jobs", len(p.EvictedJobs))
   154  	return &raft_log.GetCompactionPlanUpdateResponse{Term: cmd.Term, PlanUpdate: p}, nil
   155  }
   156  
   157  func (h *CompactionCommandHandler) UpdateCompactionPlan(
   158  	ctx context.Context, tx *bbolt.Tx, cmd *raft.Log, req *raft_log.UpdateCompactionPlanRequest,
   159  ) (resp *raft_log.UpdateCompactionPlanResponse, err error) {
   160  	span, _ := tracing.StartSpanFromContext(ctx, "raft.UpdateCompactionPlan")
   161  	span.SetTag("raft_log_index", cmd.Index)
   162  	span.SetTag("raft_log_term", cmd.Term)
   163  	span.SetTag("request_term", req.Term)
   164  	defer func() {
   165  		if err != nil {
   166  			ext.LogError(span, err)
   167  		}
   168  		span.Finish()
   169  	}()
   170  
   171  	if req.Term != cmd.Term || req.GetPlanUpdate() == nil {
   172  		level.Warn(h.logger).Log(
   173  			"msg", "rejecting compaction plan update; term mismatch: leader has changed",
   174  			"current_term", cmd.Term,
   175  			"request_term", req.Term,
   176  		)
   177  		return new(raft_log.UpdateCompactionPlanResponse), nil
   178  	}
   179  
   180  	if err = h.planner.UpdatePlan(tx, req.PlanUpdate); err != nil {
   181  		level.Error(h.logger).Log("msg", "failed to update compaction planner", "err", err)
   182  		return nil, err
   183  	}
   184  
   185  	if err = h.scheduler.UpdateSchedule(tx, req.PlanUpdate); err != nil {
   186  		level.Error(h.logger).Log("msg", "failed to update compaction schedule", "err", err)
   187  		return nil, err
   188  	}
   189  
   190  	for _, job := range req.PlanUpdate.NewJobs {
   191  		if err = h.tombstones.DeleteTombstones(tx, cmd, job.Plan.Tombstones...); err != nil {
   192  			level.Error(h.logger).Log("msg", "failed to delete tombstones", "err", err)
   193  			return nil, err
   194  		}
   195  	}
   196  
   197  	for _, job := range req.PlanUpdate.CompletedJobs {
   198  		compacted := job.GetCompactedBlocks()
   199  		if compacted == nil || compacted.SourceBlocks == nil || len(compacted.NewBlocks) == 0 {
   200  			level.Warn(h.logger).Log("msg", "compacted blocks are missing; skipping", "job", job.State.Name)
   201  			continue
   202  		}
   203  		if err = h.tombstones.AddTombstones(tx, cmd, blockTombstonesForCompletedJob(job)); err != nil {
   204  			level.Error(h.logger).Log("msg", "failed to add tombstones", "err", err)
   205  			return nil, err
   206  		}
   207  		for _, block := range compacted.NewBlocks {
   208  			if err = h.compactor.Compact(tx, compaction.NewBlockEntry(cmd, block)); err != nil {
   209  				level.Error(h.logger).Log("msg", "failed to compact block", "err", err)
   210  				return nil, err
   211  			}
   212  		}
   213  		if err = h.index.ReplaceBlocks(tx, compacted); err != nil {
   214  			level.Error(h.logger).Log("msg", "failed to replace blocks", "err", err)
   215  			return nil, err
   216  		}
   217  	}
   218  
   219  	span.SetTag("new_jobs", len(req.PlanUpdate.NewJobs))
   220  	span.SetTag("completed_jobs", len(req.PlanUpdate.CompletedJobs))
   221  	span.SetTag("updated_jobs", len(req.PlanUpdate.UpdatedJobs))
   222  	return &raft_log.UpdateCompactionPlanResponse{PlanUpdate: req.PlanUpdate}, nil
   223  }
   224  
   225  func blockTombstonesForCompletedJob(job *raft_log.CompletedCompactionJob) *metastorev1.Tombstones {
   226  	source := job.CompactedBlocks.SourceBlocks
   227  	return &metastorev1.Tombstones{
   228  		Blocks: &metastorev1.BlockTombstones{
   229  			Name:            job.State.Name,
   230  			Shard:           source.Shard,
   231  			Tenant:          source.Tenant,
   232  			CompactionLevel: job.State.CompactionLevel,
   233  			Blocks:          source.Blocks,
   234  		},
   235  	}
   236  }