github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/split_delay_helper.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 20 "go.etcd.io/etcd/raft" 21 "go.etcd.io/etcd/raft/tracker" 22 ) 23 24 type splitDelayHelperI interface { 25 RaftStatus(context.Context) (roachpb.RangeID, *raft.Status) 26 ProposeEmptyCommand(ctx context.Context) 27 NumAttempts() int 28 Sleep(context.Context) time.Duration 29 } 30 31 type splitDelayHelper Replica 32 33 func (sdh *splitDelayHelper) RaftStatus(ctx context.Context) (roachpb.RangeID, *raft.Status) { 34 r := (*Replica)(sdh) 35 r.mu.RLock() 36 raftStatus := r.raftStatusRLocked() 37 if raftStatus != nil { 38 updateRaftProgressFromActivity( 39 ctx, raftStatus.Progress, r.descRLocked().Replicas().All(), 40 func(replicaID roachpb.ReplicaID) bool { 41 return r.mu.lastUpdateTimes.isFollowerActiveSince( 42 ctx, replicaID, timeutil.Now(), r.store.cfg.RangeLeaseActiveDuration()) 43 }, 44 ) 45 } 46 r.mu.RUnlock() 47 return r.RangeID, raftStatus 48 } 49 50 func (sdh *splitDelayHelper) ProposeEmptyCommand(ctx context.Context) { 51 r := (*Replica)(sdh) 52 r.raftMu.Lock() 53 _ = r.withRaftGroup(true /* campaignOnWake */, func(rawNode *raft.RawNode) (bool, error) { 54 // NB: intentionally ignore the error (which can be ErrProposalDropped 55 // when there's an SST inflight). 56 data := encodeRaftCommand(raftVersionStandard, makeIDKey(), nil) 57 _ = rawNode.Propose(data) 58 // NB: we need to unquiesce as the group might be quiesced. 59 return true /* unquiesceAndWakeLeader */, nil 60 }) 61 r.raftMu.Unlock() 62 } 63 64 func (sdh *splitDelayHelper) NumAttempts() int { 65 // There is a related mechanism regarding snapshots and splits that is worth 66 // pointing out here: Incoming MsgApp (see the _ assignment below) are 67 // dropped if they are addressed to uninitialized replicas likely to become 68 // initialized via a split trigger. These MsgApp are sent approximately once 69 // per heartbeat interval, but sometimes there's an additional delay thanks 70 // to having to wait for a GC run. In effect, it shouldn't take more than a 71 // small number of heartbeats until the follower leaves probing status, so 72 // NumAttempts should at least match that. 73 _ = maybeDropMsgApp // guru assignment 74 // Snapshots can come up for other reasons and at the end of the day, the 75 // delay introduced here needs to make sure that the snapshot queue 76 // processes at a higher rate than splits happen, so the number of attempts 77 // will typically be much higher than what's suggested by maybeDropMsgApp. 78 return (*Replica)(sdh).store.cfg.RaftDelaySplitToSuppressSnapshotTicks 79 } 80 81 func (sdh *splitDelayHelper) Sleep(ctx context.Context) time.Duration { 82 tBegin := timeutil.Now() 83 84 r := (*Replica)(sdh) 85 select { 86 case <-time.After(r.store.cfg.RaftTickInterval): 87 case <-ctx.Done(): 88 } 89 90 return timeutil.Since(tBegin) 91 } 92 93 func maybeDelaySplitToAvoidSnapshot(ctx context.Context, sdh splitDelayHelperI) string { 94 maxDelaySplitToAvoidSnapshotTicks := sdh.NumAttempts() 95 96 var slept time.Duration 97 var extra string 98 var succeeded bool 99 for ticks := 0; ticks < maxDelaySplitToAvoidSnapshotTicks; ticks++ { 100 succeeded = false 101 extra = "" 102 rangeID, raftStatus := sdh.RaftStatus(ctx) 103 104 if raftStatus == nil { 105 // Don't delay on followers (we don't know when to stop). This case 106 // is hit rarely enough to not matter. 107 extra += "; not Raft leader" 108 succeeded = true 109 break 110 } 111 112 done := true 113 for replicaID, pr := range raftStatus.Progress { 114 if pr.State != tracker.StateReplicate { 115 if !pr.RecentActive { 116 if ticks == 0 { 117 // Having set done = false, we make sure we're not exiting early. 118 // This is important because we sometimes need that Raft proposal 119 // below to make the followers active as there's no chatter on an 120 // idle range. (Note that there's a theoretical race in which the 121 // follower becomes inactive again during the sleep, but the 122 // inactivity interval is much larger than a tick). 123 // 124 // Don't do this more than once though: if a follower is down, 125 // we don't want to delay splits for it. 126 done = false 127 } 128 extra += fmt.Sprintf("; r%d/%d inactive", rangeID, replicaID) 129 continue 130 } 131 done = false 132 extra += fmt.Sprintf("; replica r%d/%d not caught up: %+v", rangeID, replicaID, &pr) 133 } 134 } 135 if done { 136 succeeded = true 137 break 138 } 139 // Propose an empty command which works around a Raft bug that can 140 // leave a follower in ProgressStateProbe even though it has caught 141 // up. 142 sdh.ProposeEmptyCommand(ctx) 143 slept += sdh.Sleep(ctx) 144 145 if ctx.Err() != nil { 146 return "" 147 } 148 } 149 150 if slept != 0 { 151 extra += fmt.Sprintf("; delayed split for %.1fs to avoid Raft snapshot", slept.Seconds()) 152 if !succeeded { 153 extra += " (without success)" 154 } 155 } 156 157 return extra 158 }