github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_snapshot_queue.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/config" 18 "github.com/cockroachdb/cockroach/pkg/gossip" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/util/hlc" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 23 "github.com/cockroachdb/errors" 24 "go.etcd.io/etcd/raft/tracker" 25 ) 26 27 const ( 28 // raftSnapshotQueueTimerDuration is the duration between Raft snapshot of 29 // queued replicas. 30 raftSnapshotQueueTimerDuration = 0 // zero duration to process Raft snapshots greedily 31 32 raftSnapshotPriority float64 = 0 33 ) 34 35 // raftSnapshotQueue manages a queue of replicas which may need to catch a 36 // replica up with a snapshot to their range. 37 type raftSnapshotQueue struct { 38 *baseQueue 39 } 40 41 // newRaftSnapshotQueue returns a new instance of raftSnapshotQueue. 42 func newRaftSnapshotQueue(store *Store, g *gossip.Gossip) *raftSnapshotQueue { 43 rq := &raftSnapshotQueue{} 44 rq.baseQueue = newBaseQueue( 45 "raftsnapshot", rq, store, g, 46 queueConfig{ 47 maxSize: defaultQueueMaxSize, 48 // The Raft leader (which sends Raft snapshots) may not be the 49 // leaseholder. Operating on a replica without holding the lease is the 50 // reason Raft snapshots cannot be performed by the replicateQueue. 51 needsLease: false, 52 needsSystemConfig: false, 53 acceptsUnsplitRanges: true, 54 processTimeoutFunc: makeQueueSnapshotTimeoutFunc(recoverySnapshotRate), 55 successes: store.metrics.RaftSnapshotQueueSuccesses, 56 failures: store.metrics.RaftSnapshotQueueFailures, 57 pending: store.metrics.RaftSnapshotQueuePending, 58 processingNanos: store.metrics.RaftSnapshotQueueProcessingNanos, 59 }, 60 ) 61 return rq 62 } 63 64 func (rq *raftSnapshotQueue) shouldQueue( 65 ctx context.Context, now hlc.Timestamp, repl *Replica, _ *config.SystemConfig, 66 ) (shouldQ bool, priority float64) { 67 // If a follower needs a snapshot, enqueue at the highest priority. 68 if status := repl.RaftStatus(); status != nil { 69 // raft.Status.Progress is only populated on the Raft group leader. 70 for _, p := range status.Progress { 71 if p.State == tracker.StateSnapshot { 72 if log.V(2) { 73 log.Infof(ctx, "raft snapshot needed, enqueuing") 74 } 75 return true, raftSnapshotPriority 76 } 77 } 78 } 79 return false, 0 80 } 81 82 func (rq *raftSnapshotQueue) process( 83 ctx context.Context, repl *Replica, _ *config.SystemConfig, 84 ) error { 85 // If a follower requires a Raft snapshot, perform it. 86 if status := repl.RaftStatus(); status != nil { 87 // raft.Status.Progress is only populated on the Raft group leader. 88 for id, p := range status.Progress { 89 if p.State == tracker.StateSnapshot { 90 if log.V(1) { 91 log.Infof(ctx, "sending raft snapshot") 92 } 93 if err := rq.processRaftSnapshot(ctx, repl, roachpb.ReplicaID(id)); err != nil { 94 return err 95 } 96 } 97 } 98 } 99 return nil 100 } 101 102 func (rq *raftSnapshotQueue) processRaftSnapshot( 103 ctx context.Context, repl *Replica, id roachpb.ReplicaID, 104 ) error { 105 desc := repl.Desc() 106 repDesc, ok := desc.GetReplicaDescriptorByID(id) 107 if !ok { 108 return errors.Errorf("%s: replica %d not present in %v", repl, id, desc.Replicas()) 109 } 110 snapType := SnapshotRequest_RAFT 111 112 // A learner replica is either getting a snapshot of type LEARNER by the node 113 // that's adding it or it's been orphaned and it's about to be cleaned up by 114 // the replicate queue. Either way, no point in also sending it a snapshot of 115 // type RAFT. 116 if repDesc.GetType() == roachpb.LEARNER { 117 if fn := repl.store.cfg.TestingKnobs.ReplicaSkipLearnerSnapshot; fn != nil && fn() { 118 return nil 119 } 120 snapType = SnapshotRequest_LEARNER 121 if index := repl.getAndGCSnapshotLogTruncationConstraints(timeutil.Now(), repDesc.StoreID); index > 0 { 122 // There is a snapshot being transferred. It's probably a LEARNER snap, so 123 // bail for now and try again later. 124 err := errors.Errorf( 125 "skipping snapshot; replica is likely a learner in the process of being added: %s", repDesc) 126 log.Infof(ctx, "%v", err) 127 // TODO(dan): This is super brittle and non-obvious. In the common case, 128 // this check avoids duplicate work, but in rare cases, we send the 129 // learner snap at an index before the one raft wanted here. The raft 130 // leader should be able to use logs to get the rest of the way, but it 131 // doesn't try. In this case, skipping the raft snapshot would mean that 132 // we have to wait for the next scanner cycle of the raft snapshot queue 133 // to pick it up again. So, punt the responsibility back to raft by 134 // telling it that the snapshot failed. If the learner snap ends up being 135 // sufficient, this message will be ignored, but if we hit the case 136 // described above, this will cause raft to keep asking for a snap and at 137 // some point the snapshot lock above will be released and we'll fall 138 // through to the below. 139 repl.reportSnapshotStatus(ctx, repDesc.ReplicaID, err) 140 return nil 141 } 142 } 143 144 err := repl.sendSnapshot(ctx, repDesc, snapType, SnapshotRequest_RECOVERY) 145 146 // NB: if the snapshot fails because of an overlapping replica on the 147 // recipient which is also waiting for a snapshot, the "smart" thing is to 148 // send that other snapshot with higher priority. The problem is that the 149 // leader for the overlapping range may not be this node. This happens 150 // typically during splits and merges when overly aggressive log truncations 151 // occur. 152 // 153 // For splits, the overlapping range will be a replica of the pre-split 154 // range that needs a snapshot to catch it up across the split trigger. 155 // 156 // For merges, the overlapping replicas belong to ranges since subsumed by 157 // this range. In particular, there can be many of them if merges apply in 158 // rapid succession. The leftmost replica is the most important one to catch 159 // up, as it will absorb all of the overlapping replicas when caught up past 160 // all of the merges. 161 // 162 // We're currently not handling this and instead rely on the quota pool to 163 // make sure that log truncations won't require snapshots for healthy 164 // followers. 165 return err 166 } 167 168 func (*raftSnapshotQueue) timer(_ time.Duration) time.Duration { 169 return raftSnapshotQueueTimerDuration 170 } 171 172 func (rq *raftSnapshotQueue) purgatoryChan() <-chan time.Time { 173 return nil 174 }