github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction_service.go

github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction_service.go (about)

     1  package metastore
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	"github.com/go-kit/log"
     8  	"github.com/go-kit/log/level"
     9  	"github.com/opentracing/opentracing-go"
    10  	"github.com/opentracing/opentracing-go/ext"
    11  	"google.golang.org/grpc/codes"
    12  	"google.golang.org/grpc/status"
    13  
    14  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    15  	"github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log"
    16  	"github.com/grafana/pyroscope/pkg/metastore/fsm"
    17  	"github.com/grafana/pyroscope/pkg/metastore/raftnode"
    18  )
    19  
    20  type CompactionService struct {
    21  	metastorev1.CompactionServiceServer
    22  
    23  	logger log.Logger
    24  	mu     sync.Mutex
    25  	raft   Raft
    26  }
    27  
    28  func NewCompactionService(
    29  	logger log.Logger,
    30  	raft Raft,
    31  ) *CompactionService {
    32  	return &CompactionService{
    33  		logger: logger,
    34  		raft:   raft,
    35  	}
    36  }
    37  
    38  func (svc *CompactionService) PollCompactionJobs(
    39  	ctx context.Context,
    40  	req *metastorev1.PollCompactionJobsRequest,
    41  ) (resp *metastorev1.PollCompactionJobsResponse, err error) {
    42  	span, ctx := opentracing.StartSpanFromContext(ctx, "CompactionService.PollCompactionJobs")
    43  	defer func() {
    44  		if err != nil {
    45  			ext.LogError(span, err)
    46  		}
    47  		span.Finish()
    48  	}()
    49  
    50  	span.SetTag("status_updates", len(req.GetStatusUpdates()))
    51  	span.SetTag("job_capacity", req.GetJobCapacity())
    52  
    53  	// This is a two-step process. To commit changes to the compaction plan,
    54  	// we need to ensure that all replicas apply exactly the same changes.
    55  	// Instead of relying on identical behavior across replicas and a
    56  	// reproducible compaction plan, we explicitly replicate the change.
    57  	//
    58  	// NOTE(kolesnikovae): We can use Leader Read optimization here. However,
    59  	// we would need to ensure synchronization between the compactor and the
    60  	// index, and unsure isolation at the data level. For now, we're using
    61  	// the raft log to guarantee serializable isolation level.
    62  	//
    63  	// Make sure that only one compaction plan update is in progress at a time.
    64  	// This lock does not introduce contention, as the raft log is synchronous.
    65  	svc.mu.Lock()
    66  	defer svc.mu.Unlock()
    67  
    68  	// First, we ask the current leader to prepare the change. This is a read
    69  	// operation conducted through the raft log: at this stage, we only
    70  	// prepare changes; the command handler does not alter the state.
    71  	request := &raft_log.GetCompactionPlanUpdateRequest{
    72  		StatusUpdates: make([]*raft_log.CompactionJobStatusUpdate, 0, len(req.StatusUpdates)),
    73  		AssignJobsMax: req.JobCapacity,
    74  	}
    75  
    76  	// We only send the status updates (without job results) to minimize the
    77  	// traffic, but we want to include the results of compaction in the final
    78  	// proposal. If the status update is accepted, we trust the worker and
    79  	// don't need to load our own copy of the job.
    80  	compacted := make(map[string]*metastorev1.CompactionJobStatusUpdate, len(req.StatusUpdates))
    81  	for _, update := range req.StatusUpdates {
    82  		if update.CompactedBlocks != nil {
    83  			compacted[update.Name] = update
    84  		}
    85  		request.StatusUpdates = append(request.StatusUpdates, &raft_log.CompactionJobStatusUpdate{
    86  			Name:   update.Name,
    87  			Token:  update.Token,
    88  			Status: update.Status,
    89  		})
    90  	}
    91  
    92  	cmd := fsm.RaftLogEntryType(raft_log.RaftCommand_RAFT_COMMAND_GET_COMPACTION_PLAN_UPDATE)
    93  	proposeResp, err := svc.raft.Propose(ctx, cmd, req)
    94  	if err != nil {
    95  		if !raftnode.IsRaftLeadershipError(err) {
    96  			level.Error(svc.logger).Log("msg", "failed to prepare compaction plan", "err", err)
    97  		}
    98  		return nil, err
    99  	}
   100  	prepared := proposeResp.(*raft_log.GetCompactionPlanUpdateResponse)
   101  	planUpdate := prepared.GetPlanUpdate()
   102  
   103  	// Copy plan updates to the worker response. The job plan is only sent for
   104  	// newly assigned jobs. Lease renewals do not require the plan to be sent.
   105  	workerResp := &metastorev1.PollCompactionJobsResponse{
   106  		CompactionJobs: make([]*metastorev1.CompactionJob, 0, len(planUpdate.AssignedJobs)),
   107  		Assignments:    make([]*metastorev1.CompactionJobAssignment, 0, len(planUpdate.UpdatedJobs)),
   108  	}
   109  	for _, updated := range planUpdate.UpdatedJobs {
   110  		update := updated.State
   111  		workerResp.Assignments = append(workerResp.Assignments, &metastorev1.CompactionJobAssignment{
   112  			Name:           update.Name,
   113  			Token:          update.Token,
   114  			LeaseExpiresAt: update.LeaseExpiresAt,
   115  		})
   116  	}
   117  	for _, assigned := range planUpdate.AssignedJobs {
   118  		assignment := assigned.State
   119  		workerResp.Assignments = append(workerResp.Assignments, &metastorev1.CompactionJobAssignment{
   120  			Name:           assignment.Name,
   121  			Token:          assignment.Token,
   122  			LeaseExpiresAt: assignment.LeaseExpiresAt,
   123  		})
   124  		job := assigned.Plan
   125  		workerResp.CompactionJobs = append(workerResp.CompactionJobs, &metastorev1.CompactionJob{
   126  			Name:            job.Name,
   127  			Shard:           job.Shard,
   128  			Tenant:          job.Tenant,
   129  			CompactionLevel: job.CompactionLevel,
   130  			SourceBlocks:    job.SourceBlocks,
   131  			Tombstones:      job.Tombstones,
   132  		})
   133  		// Assigned jobs are not written to the raft log (only the assignments):
   134  		// from our perspective (scheduler and planner) these are just job updates.
   135  		assigned.Plan = nil
   136  	}
   137  
   138  	// Include the compacted blocks in the final proposal.
   139  	for _, job := range planUpdate.CompletedJobs {
   140  		if update := compacted[job.State.Name]; update != nil {
   141  			job.CompactedBlocks = update.CompactedBlocks
   142  		}
   143  	}
   144  
   145  	// Now that we have the plan, we need to propagate it through the
   146  	// raft log to ensure it is applied consistently across all replicas,
   147  	// regardless of their individual state or view of the plan.
   148  	cmd = fsm.RaftLogEntryType(raft_log.RaftCommand_RAFT_COMMAND_UPDATE_COMPACTION_PLAN)
   149  
   150  	// We also include the current term of the planning step so that later
   151  	// we can verify that the leader has not changed, and the plan is still
   152  	// up-to-date. Otherwise, e.g., in the ABA case, when the current node
   153  	// loses leadership and gains is back in-between these two steps, we
   154  	// cannot guarantee that the proposed plan is still valid and up-to-date.
   155  	// The raft handler cannot return an error here (because this is a valid
   156  	// scenario, and we don't want to stop the node/cluster). Instead, an
   157  	// empty response would indicate that the plan is rejected.
   158  	proposal := &raft_log.UpdateCompactionPlanRequest{Term: prepared.Term, PlanUpdate: planUpdate}
   159  	if proposeResp, err = svc.raft.Propose(ctx, cmd, proposal); err != nil {
   160  		if !raftnode.IsRaftLeadershipError(err) {
   161  			level.Error(svc.logger).Log("msg", "failed to update compaction plan", "err", err)
   162  		}
   163  		return nil, err
   164  	}
   165  	accepted := proposeResp.(*raft_log.UpdateCompactionPlanResponse).GetPlanUpdate()
   166  	if accepted == nil {
   167  		level.Warn(svc.logger).Log("msg", "compaction plan update rejected")
   168  		return nil, status.Error(codes.FailedPrecondition, "failed to update compaction plan")
   169  	}
   170  
   171  	// As of now, accepted plan always matches the proposed one,
   172  	// so our prepared worker response is still valid.
   173  
   174  	span.SetTag("assigned_jobs", len(workerResp.GetCompactionJobs()))
   175  	span.SetTag("assignment_updates", len(workerResp.GetAssignments()))
   176  	return workerResp, nil
   177  }