go.etcd.io/etcd@v3.3.27+incompatible/raft/progress.go (about) 1 // Copyright 2015 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package raft 16 17 import "fmt" 18 19 const ( 20 ProgressStateProbe ProgressStateType = iota 21 ProgressStateReplicate 22 ProgressStateSnapshot 23 ) 24 25 type ProgressStateType uint64 26 27 var prstmap = [...]string{ 28 "ProgressStateProbe", 29 "ProgressStateReplicate", 30 "ProgressStateSnapshot", 31 } 32 33 func (st ProgressStateType) String() string { return prstmap[uint64(st)] } 34 35 // Progress represents a follower’s progress in the view of the leader. Leader maintains 36 // progresses of all followers, and sends entries to the follower based on its progress. 37 type Progress struct { 38 Match, Next uint64 39 // State defines how the leader should interact with the follower. 40 // 41 // When in ProgressStateProbe, leader sends at most one replication message 42 // per heartbeat interval. It also probes actual progress of the follower. 43 // 44 // When in ProgressStateReplicate, leader optimistically increases next 45 // to the latest entry sent after sending replication message. This is 46 // an optimized state for fast replicating log entries to the follower. 47 // 48 // When in ProgressStateSnapshot, leader should have sent out snapshot 49 // before and stops sending any replication message. 50 State ProgressStateType 51 52 // Paused is used in ProgressStateProbe. 53 // When Paused is true, raft should pause sending replication message to this peer. 54 Paused bool 55 // PendingSnapshot is used in ProgressStateSnapshot. 56 // If there is a pending snapshot, the pendingSnapshot will be set to the 57 // index of the snapshot. If pendingSnapshot is set, the replication process of 58 // this Progress will be paused. raft will not resend snapshot until the pending one 59 // is reported to be failed. 60 PendingSnapshot uint64 61 62 // RecentActive is true if the progress is recently active. Receiving any messages 63 // from the corresponding follower indicates the progress is active. 64 // RecentActive can be reset to false after an election timeout. 65 RecentActive bool 66 67 // inflights is a sliding window for the inflight messages. 68 // Each inflight message contains one or more log entries. 69 // The max number of entries per message is defined in raft config as MaxSizePerMsg. 70 // Thus inflight effectively limits both the number of inflight messages 71 // and the bandwidth each Progress can use. 72 // When inflights is full, no more message should be sent. 73 // When a leader sends out a message, the index of the last 74 // entry should be added to inflights. The index MUST be added 75 // into inflights in order. 76 // When a leader receives a reply, the previous inflights should 77 // be freed by calling inflights.freeTo with the index of the last 78 // received entry. 79 ins *inflights 80 81 // IsLearner is true if this progress is tracked for a learner. 82 IsLearner bool 83 } 84 85 func (pr *Progress) resetState(state ProgressStateType) { 86 pr.Paused = false 87 pr.PendingSnapshot = 0 88 pr.State = state 89 pr.ins.reset() 90 } 91 92 func (pr *Progress) becomeProbe() { 93 // If the original state is ProgressStateSnapshot, progress knows that 94 // the pending snapshot has been sent to this peer successfully, then 95 // probes from pendingSnapshot + 1. 96 if pr.State == ProgressStateSnapshot { 97 pendingSnapshot := pr.PendingSnapshot 98 pr.resetState(ProgressStateProbe) 99 pr.Next = max(pr.Match+1, pendingSnapshot+1) 100 } else { 101 pr.resetState(ProgressStateProbe) 102 pr.Next = pr.Match + 1 103 } 104 } 105 106 func (pr *Progress) becomeReplicate() { 107 pr.resetState(ProgressStateReplicate) 108 pr.Next = pr.Match + 1 109 } 110 111 func (pr *Progress) becomeSnapshot(snapshoti uint64) { 112 pr.resetState(ProgressStateSnapshot) 113 pr.PendingSnapshot = snapshoti 114 } 115 116 // maybeUpdate returns false if the given n index comes from an outdated message. 117 // Otherwise it updates the progress and returns true. 118 func (pr *Progress) maybeUpdate(n uint64) bool { 119 var updated bool 120 if pr.Match < n { 121 pr.Match = n 122 updated = true 123 pr.resume() 124 } 125 if pr.Next < n+1 { 126 pr.Next = n + 1 127 } 128 return updated 129 } 130 131 func (pr *Progress) optimisticUpdate(n uint64) { pr.Next = n + 1 } 132 133 // maybeDecrTo returns false if the given to index comes from an out of order message. 134 // Otherwise it decreases the progress next index to min(rejected, last) and returns true. 135 func (pr *Progress) maybeDecrTo(rejected, last uint64) bool { 136 if pr.State == ProgressStateReplicate { 137 // the rejection must be stale if the progress has matched and "rejected" 138 // is smaller than "match". 139 if rejected <= pr.Match { 140 return false 141 } 142 // directly decrease next to match + 1 143 pr.Next = pr.Match + 1 144 return true 145 } 146 147 // the rejection must be stale if "rejected" does not match next - 1 148 if pr.Next-1 != rejected { 149 return false 150 } 151 152 if pr.Next = min(rejected, last+1); pr.Next < 1 { 153 pr.Next = 1 154 } 155 pr.resume() 156 return true 157 } 158 159 func (pr *Progress) pause() { pr.Paused = true } 160 func (pr *Progress) resume() { pr.Paused = false } 161 162 // IsPaused returns whether sending log entries to this node has been 163 // paused. A node may be paused because it has rejected recent 164 // MsgApps, is currently waiting for a snapshot, or has reached the 165 // MaxInflightMsgs limit. 166 func (pr *Progress) IsPaused() bool { 167 switch pr.State { 168 case ProgressStateProbe: 169 return pr.Paused 170 case ProgressStateReplicate: 171 return pr.ins.full() 172 case ProgressStateSnapshot: 173 return true 174 default: 175 panic("unexpected state") 176 } 177 } 178 179 func (pr *Progress) snapshotFailure() { pr.PendingSnapshot = 0 } 180 181 // needSnapshotAbort returns true if snapshot progress's Match 182 // is equal or higher than the pendingSnapshot. 183 func (pr *Progress) needSnapshotAbort() bool { 184 return pr.State == ProgressStateSnapshot && pr.Match >= pr.PendingSnapshot 185 } 186 187 func (pr *Progress) String() string { 188 return fmt.Sprintf("next = %d, match = %d, state = %s, waiting = %v, pendingSnapshot = %d", pr.Next, pr.Match, pr.State, pr.IsPaused(), pr.PendingSnapshot) 189 } 190 191 type inflights struct { 192 // the starting index in the buffer 193 start int 194 // number of inflights in the buffer 195 count int 196 197 // the size of the buffer 198 size int 199 200 // buffer contains the index of the last entry 201 // inside one message. 202 buffer []uint64 203 } 204 205 func newInflights(size int) *inflights { 206 return &inflights{ 207 size: size, 208 } 209 } 210 211 // add adds an inflight into inflights 212 func (in *inflights) add(inflight uint64) { 213 if in.full() { 214 panic("cannot add into a full inflights") 215 } 216 next := in.start + in.count 217 size := in.size 218 if next >= size { 219 next -= size 220 } 221 if next >= len(in.buffer) { 222 in.growBuf() 223 } 224 in.buffer[next] = inflight 225 in.count++ 226 } 227 228 // grow the inflight buffer by doubling up to inflights.size. We grow on demand 229 // instead of preallocating to inflights.size to handle systems which have 230 // thousands of Raft groups per process. 231 func (in *inflights) growBuf() { 232 newSize := len(in.buffer) * 2 233 if newSize == 0 { 234 newSize = 1 235 } else if newSize > in.size { 236 newSize = in.size 237 } 238 newBuffer := make([]uint64, newSize) 239 copy(newBuffer, in.buffer) 240 in.buffer = newBuffer 241 } 242 243 // freeTo frees the inflights smaller or equal to the given `to` flight. 244 func (in *inflights) freeTo(to uint64) { 245 if in.count == 0 || to < in.buffer[in.start] { 246 // out of the left side of the window 247 return 248 } 249 250 idx := in.start 251 var i int 252 for i = 0; i < in.count; i++ { 253 if to < in.buffer[idx] { // found the first large inflight 254 break 255 } 256 257 // increase index and maybe rotate 258 size := in.size 259 if idx++; idx >= size { 260 idx -= size 261 } 262 } 263 // free i inflights and set new start index 264 in.count -= i 265 in.start = idx 266 if in.count == 0 { 267 // inflights is empty, reset the start index so that we don't grow the 268 // buffer unnecessarily. 269 in.start = 0 270 } 271 } 272 273 func (in *inflights) freeFirstOne() { in.freeTo(in.buffer[in.start]) } 274 275 // full returns true if the inflights is full. 276 func (in *inflights) full() bool { 277 return in.count == in.size 278 } 279 280 // resets frees all inflights. 281 func (in *inflights) reset() { 282 in.count = 0 283 in.start = 0 284 }