github.com/grafana/pyroscope@v1.18.0/pkg/metastore/compaction_raft_handler.go (about) 1 package metastore 2 3 import ( 4 "context" 5 6 "github.com/go-kit/log" 7 "github.com/go-kit/log/level" 8 "github.com/hashicorp/raft" 9 "github.com/opentracing/opentracing-go/ext" 10 "go.etcd.io/bbolt" 11 12 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 13 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1/raft_log" 14 "github.com/grafana/pyroscope/pkg/metastore/compaction" 15 "github.com/grafana/pyroscope/pkg/metastore/tracing" 16 ) 17 18 type IndexReplacer interface { 19 ReplaceBlocks(*bbolt.Tx, *metastorev1.CompactedBlocks) error 20 } 21 22 type CompactionCommandHandler struct { 23 logger log.Logger 24 index IndexReplacer 25 compactor compaction.Compactor 26 planner compaction.Planner 27 scheduler compaction.Scheduler 28 tombstones Tombstones 29 } 30 31 func NewCompactionCommandHandler( 32 logger log.Logger, 33 index IndexReplacer, 34 compactor compaction.Compactor, 35 planner compaction.Planner, 36 scheduler compaction.Scheduler, 37 tombstones Tombstones, 38 ) *CompactionCommandHandler { 39 return &CompactionCommandHandler{ 40 logger: logger, 41 index: index, 42 compactor: compactor, 43 planner: planner, 44 scheduler: scheduler, 45 tombstones: tombstones, 46 } 47 } 48 49 func (h *CompactionCommandHandler) GetCompactionPlanUpdate( 50 ctx context.Context, tx *bbolt.Tx, cmd *raft.Log, req *raft_log.GetCompactionPlanUpdateRequest, 51 ) (resp *raft_log.GetCompactionPlanUpdateResponse, err error) { 52 span, _ := tracing.StartSpanFromContext(ctx, "raft.GetCompactionPlanUpdate") 53 span.SetTag("status_updates", len(req.StatusUpdates)) 54 span.SetTag("assign_jobs_max", req.AssignJobsMax) 55 span.SetTag("raft_log_index", cmd.Index) 56 span.SetTag("raft_log_term", cmd.Term) 57 defer func() { 58 if err != nil { 59 ext.LogError(span, err) 60 } 61 span.Finish() 62 }() 63 64 // We need to generate a plan of the update caused by the new status 65 // report from the worker. The plan will be used to update the schedule 66 // after the Raft consensus is reached. 67 planner := h.planner.NewPlan(cmd) 68 schedule := h.scheduler.NewSchedule(tx, cmd) 69 p := new(raft_log.CompactionPlanUpdate) 70 71 // Any status update may translate to either a job lease refresh, or a 72 // completed job. Status update might be rejected, if the worker has 73 // lost the job. We treat revoked jobs as vacant slots for new 74 // assignments, therefore we try to update jobs' status first. 75 var revoked int 76 for _, status := range req.StatusUpdates { 77 switch state := schedule.UpdateJob(status); { 78 case state == nil: 79 // Nil state indicates that the job has been abandoned and 80 // reassigned, or the request is not valid. This may happen 81 // from time to time, and we should just ignore such requests. 82 revoked++ 83 84 case state.Status == metastorev1.CompactionJobStatus_COMPACTION_STATUS_SUCCESS: 85 p.CompletedJobs = append(p.CompletedJobs, &raft_log.CompletedCompactionJob{State: state}) 86 87 case state.Status == metastorev1.CompactionJobStatus_COMPACTION_STATUS_IN_PROGRESS: 88 p.UpdatedJobs = append(p.UpdatedJobs, &raft_log.UpdatedCompactionJob{State: state}) 89 90 default: 91 // Unknown statuses are ignored. From the worker perspective, 92 // the job is re-assigned. 93 } 94 } 95 96 // AssignJobsMax tells us how many free slots the worker has. We need to 97 // account for the revoked jobs, as they are freeing the worker slots. 98 capacity := int(req.AssignJobsMax) + revoked 99 100 // Next, we need to create new jobs and assign existing 101 // 102 // NOTE(kolesnikovae): On one hand, if we assign first, we may violate the 103 // SJF principle. If we plan new jobs first, it may cause starvation of 104 // lower-priority jobs, when the compaction worker does not keep up with 105 // the high-priority job influx. As of now, we assign jobs before creating 106 // ones. If we change it, we need to make sure that the Schedule 107 // implementation allows doing this. 108 for assigned := 0; assigned < capacity; assigned++ { 109 job, err := schedule.AssignJob() 110 if err != nil { 111 level.Error(h.logger).Log("msg", "failed to assign compaction job", "err", err) 112 return nil, err 113 } 114 if job != nil { 115 p.AssignedJobs = append(p.AssignedJobs, job) 116 } 117 } 118 119 for created := 0; created < capacity; created++ { 120 // Evict jobs that cannot be assigned to workers. 121 if evicted := schedule.EvictJob(); evicted != nil { 122 level.Debug(h.logger).Log("msg", "planning to evict failed job", "job", evicted.Name) 123 p.EvictedJobs = append(p.EvictedJobs, &raft_log.EvictedCompactionJob{ 124 State: evicted, 125 }) 126 } 127 plan, err := planner.CreateJob() 128 if err != nil { 129 level.Error(h.logger).Log("msg", "failed to create compaction job", "err", err) 130 return nil, err 131 } 132 if plan == nil { 133 // No more jobs to create. 134 break 135 } 136 state := schedule.AddJob(plan) 137 if state == nil { 138 // Scheduler declined the job. The only case when this may happen 139 // is when the scheduler queue is full; theoretically, this should 140 // not happen, because we evicted jobs before creating new ones. 141 // However, if all the jobs are healthy, we may end up here. 142 level.Warn(h.logger).Log("msg", "compaction job rejected by scheduler") 143 break 144 } 145 p.NewJobs = append(p.NewJobs, &raft_log.NewCompactionJob{ 146 State: state, 147 Plan: plan, 148 }) 149 } 150 151 span.SetTag("assigned_jobs", len(p.AssignedJobs)) 152 span.SetTag("new_jobs", len(p.NewJobs)) 153 span.SetTag("evicted_jobs", len(p.EvictedJobs)) 154 return &raft_log.GetCompactionPlanUpdateResponse{Term: cmd.Term, PlanUpdate: p}, nil 155 } 156 157 func (h *CompactionCommandHandler) UpdateCompactionPlan( 158 ctx context.Context, tx *bbolt.Tx, cmd *raft.Log, req *raft_log.UpdateCompactionPlanRequest, 159 ) (resp *raft_log.UpdateCompactionPlanResponse, err error) { 160 span, _ := tracing.StartSpanFromContext(ctx, "raft.UpdateCompactionPlan") 161 span.SetTag("raft_log_index", cmd.Index) 162 span.SetTag("raft_log_term", cmd.Term) 163 span.SetTag("request_term", req.Term) 164 defer func() { 165 if err != nil { 166 ext.LogError(span, err) 167 } 168 span.Finish() 169 }() 170 171 if req.Term != cmd.Term || req.GetPlanUpdate() == nil { 172 level.Warn(h.logger).Log( 173 "msg", "rejecting compaction plan update; term mismatch: leader has changed", 174 "current_term", cmd.Term, 175 "request_term", req.Term, 176 ) 177 return new(raft_log.UpdateCompactionPlanResponse), nil 178 } 179 180 if err = h.planner.UpdatePlan(tx, req.PlanUpdate); err != nil { 181 level.Error(h.logger).Log("msg", "failed to update compaction planner", "err", err) 182 return nil, err 183 } 184 185 if err = h.scheduler.UpdateSchedule(tx, req.PlanUpdate); err != nil { 186 level.Error(h.logger).Log("msg", "failed to update compaction schedule", "err", err) 187 return nil, err 188 } 189 190 for _, job := range req.PlanUpdate.NewJobs { 191 if err = h.tombstones.DeleteTombstones(tx, cmd, job.Plan.Tombstones...); err != nil { 192 level.Error(h.logger).Log("msg", "failed to delete tombstones", "err", err) 193 return nil, err 194 } 195 } 196 197 for _, job := range req.PlanUpdate.CompletedJobs { 198 compacted := job.GetCompactedBlocks() 199 if compacted == nil || compacted.SourceBlocks == nil || len(compacted.NewBlocks) == 0 { 200 level.Warn(h.logger).Log("msg", "compacted blocks are missing; skipping", "job", job.State.Name) 201 continue 202 } 203 if err = h.tombstones.AddTombstones(tx, cmd, blockTombstonesForCompletedJob(job)); err != nil { 204 level.Error(h.logger).Log("msg", "failed to add tombstones", "err", err) 205 return nil, err 206 } 207 for _, block := range compacted.NewBlocks { 208 if err = h.compactor.Compact(tx, compaction.NewBlockEntry(cmd, block)); err != nil { 209 level.Error(h.logger).Log("msg", "failed to compact block", "err", err) 210 return nil, err 211 } 212 } 213 if err = h.index.ReplaceBlocks(tx, compacted); err != nil { 214 level.Error(h.logger).Log("msg", "failed to replace blocks", "err", err) 215 return nil, err 216 } 217 } 218 219 span.SetTag("new_jobs", len(req.PlanUpdate.NewJobs)) 220 span.SetTag("completed_jobs", len(req.PlanUpdate.CompletedJobs)) 221 span.SetTag("updated_jobs", len(req.PlanUpdate.UpdatedJobs)) 222 return &raft_log.UpdateCompactionPlanResponse{PlanUpdate: req.PlanUpdate}, nil 223 } 224 225 func blockTombstonesForCompletedJob(job *raft_log.CompletedCompactionJob) *metastorev1.Tombstones { 226 source := job.CompactedBlocks.SourceBlocks 227 return &metastorev1.Tombstones{ 228 Blocks: &metastorev1.BlockTombstones{ 229 Name: job.State.Name, 230 Shard: source.Shard, 231 Tenant: source.Tenant, 232 CompactionLevel: job.State.CompactionLevel, 233 Blocks: source.Blocks, 234 }, 235 } 236 }