go.etcd.io/etcd@v3.3.27+incompatible/raft/progress.go (about)

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package raft
    16  
    17  import "fmt"
    18  
    19  const (
    20  	ProgressStateProbe ProgressStateType = iota
    21  	ProgressStateReplicate
    22  	ProgressStateSnapshot
    23  )
    24  
    25  type ProgressStateType uint64
    26  
    27  var prstmap = [...]string{
    28  	"ProgressStateProbe",
    29  	"ProgressStateReplicate",
    30  	"ProgressStateSnapshot",
    31  }
    32  
    33  func (st ProgressStateType) String() string { return prstmap[uint64(st)] }
    34  
    35  // Progress represents a follower’s progress in the view of the leader. Leader maintains
    36  // progresses of all followers, and sends entries to the follower based on its progress.
    37  type Progress struct {
    38  	Match, Next uint64
    39  	// State defines how the leader should interact with the follower.
    40  	//
    41  	// When in ProgressStateProbe, leader sends at most one replication message
    42  	// per heartbeat interval. It also probes actual progress of the follower.
    43  	//
    44  	// When in ProgressStateReplicate, leader optimistically increases next
    45  	// to the latest entry sent after sending replication message. This is
    46  	// an optimized state for fast replicating log entries to the follower.
    47  	//
    48  	// When in ProgressStateSnapshot, leader should have sent out snapshot
    49  	// before and stops sending any replication message.
    50  	State ProgressStateType
    51  
    52  	// Paused is used in ProgressStateProbe.
    53  	// When Paused is true, raft should pause sending replication message to this peer.
    54  	Paused bool
    55  	// PendingSnapshot is used in ProgressStateSnapshot.
    56  	// If there is a pending snapshot, the pendingSnapshot will be set to the
    57  	// index of the snapshot. If pendingSnapshot is set, the replication process of
    58  	// this Progress will be paused. raft will not resend snapshot until the pending one
    59  	// is reported to be failed.
    60  	PendingSnapshot uint64
    61  
    62  	// RecentActive is true if the progress is recently active. Receiving any messages
    63  	// from the corresponding follower indicates the progress is active.
    64  	// RecentActive can be reset to false after an election timeout.
    65  	RecentActive bool
    66  
    67  	// inflights is a sliding window for the inflight messages.
    68  	// Each inflight message contains one or more log entries.
    69  	// The max number of entries per message is defined in raft config as MaxSizePerMsg.
    70  	// Thus inflight effectively limits both the number of inflight messages
    71  	// and the bandwidth each Progress can use.
    72  	// When inflights is full, no more message should be sent.
    73  	// When a leader sends out a message, the index of the last
    74  	// entry should be added to inflights. The index MUST be added
    75  	// into inflights in order.
    76  	// When a leader receives a reply, the previous inflights should
    77  	// be freed by calling inflights.freeTo with the index of the last
    78  	// received entry.
    79  	ins *inflights
    80  
    81  	// IsLearner is true if this progress is tracked for a learner.
    82  	IsLearner bool
    83  }
    84  
    85  func (pr *Progress) resetState(state ProgressStateType) {
    86  	pr.Paused = false
    87  	pr.PendingSnapshot = 0
    88  	pr.State = state
    89  	pr.ins.reset()
    90  }
    91  
    92  func (pr *Progress) becomeProbe() {
    93  	// If the original state is ProgressStateSnapshot, progress knows that
    94  	// the pending snapshot has been sent to this peer successfully, then
    95  	// probes from pendingSnapshot + 1.
    96  	if pr.State == ProgressStateSnapshot {
    97  		pendingSnapshot := pr.PendingSnapshot
    98  		pr.resetState(ProgressStateProbe)
    99  		pr.Next = max(pr.Match+1, pendingSnapshot+1)
   100  	} else {
   101  		pr.resetState(ProgressStateProbe)
   102  		pr.Next = pr.Match + 1
   103  	}
   104  }
   105  
   106  func (pr *Progress) becomeReplicate() {
   107  	pr.resetState(ProgressStateReplicate)
   108  	pr.Next = pr.Match + 1
   109  }
   110  
   111  func (pr *Progress) becomeSnapshot(snapshoti uint64) {
   112  	pr.resetState(ProgressStateSnapshot)
   113  	pr.PendingSnapshot = snapshoti
   114  }
   115  
   116  // maybeUpdate returns false if the given n index comes from an outdated message.
   117  // Otherwise it updates the progress and returns true.
   118  func (pr *Progress) maybeUpdate(n uint64) bool {
   119  	var updated bool
   120  	if pr.Match < n {
   121  		pr.Match = n
   122  		updated = true
   123  		pr.resume()
   124  	}
   125  	if pr.Next < n+1 {
   126  		pr.Next = n + 1
   127  	}
   128  	return updated
   129  }
   130  
   131  func (pr *Progress) optimisticUpdate(n uint64) { pr.Next = n + 1 }
   132  
   133  // maybeDecrTo returns false if the given to index comes from an out of order message.
   134  // Otherwise it decreases the progress next index to min(rejected, last) and returns true.
   135  func (pr *Progress) maybeDecrTo(rejected, last uint64) bool {
   136  	if pr.State == ProgressStateReplicate {
   137  		// the rejection must be stale if the progress has matched and "rejected"
   138  		// is smaller than "match".
   139  		if rejected <= pr.Match {
   140  			return false
   141  		}
   142  		// directly decrease next to match + 1
   143  		pr.Next = pr.Match + 1
   144  		return true
   145  	}
   146  
   147  	// the rejection must be stale if "rejected" does not match next - 1
   148  	if pr.Next-1 != rejected {
   149  		return false
   150  	}
   151  
   152  	if pr.Next = min(rejected, last+1); pr.Next < 1 {
   153  		pr.Next = 1
   154  	}
   155  	pr.resume()
   156  	return true
   157  }
   158  
   159  func (pr *Progress) pause()  { pr.Paused = true }
   160  func (pr *Progress) resume() { pr.Paused = false }
   161  
   162  // IsPaused returns whether sending log entries to this node has been
   163  // paused. A node may be paused because it has rejected recent
   164  // MsgApps, is currently waiting for a snapshot, or has reached the
   165  // MaxInflightMsgs limit.
   166  func (pr *Progress) IsPaused() bool {
   167  	switch pr.State {
   168  	case ProgressStateProbe:
   169  		return pr.Paused
   170  	case ProgressStateReplicate:
   171  		return pr.ins.full()
   172  	case ProgressStateSnapshot:
   173  		return true
   174  	default:
   175  		panic("unexpected state")
   176  	}
   177  }
   178  
   179  func (pr *Progress) snapshotFailure() { pr.PendingSnapshot = 0 }
   180  
   181  // needSnapshotAbort returns true if snapshot progress's Match
   182  // is equal or higher than the pendingSnapshot.
   183  func (pr *Progress) needSnapshotAbort() bool {
   184  	return pr.State == ProgressStateSnapshot && pr.Match >= pr.PendingSnapshot
   185  }
   186  
   187  func (pr *Progress) String() string {
   188  	return fmt.Sprintf("next = %d, match = %d, state = %s, waiting = %v, pendingSnapshot = %d", pr.Next, pr.Match, pr.State, pr.IsPaused(), pr.PendingSnapshot)
   189  }
   190  
   191  type inflights struct {
   192  	// the starting index in the buffer
   193  	start int
   194  	// number of inflights in the buffer
   195  	count int
   196  
   197  	// the size of the buffer
   198  	size int
   199  
   200  	// buffer contains the index of the last entry
   201  	// inside one message.
   202  	buffer []uint64
   203  }
   204  
   205  func newInflights(size int) *inflights {
   206  	return &inflights{
   207  		size: size,
   208  	}
   209  }
   210  
   211  // add adds an inflight into inflights
   212  func (in *inflights) add(inflight uint64) {
   213  	if in.full() {
   214  		panic("cannot add into a full inflights")
   215  	}
   216  	next := in.start + in.count
   217  	size := in.size
   218  	if next >= size {
   219  		next -= size
   220  	}
   221  	if next >= len(in.buffer) {
   222  		in.growBuf()
   223  	}
   224  	in.buffer[next] = inflight
   225  	in.count++
   226  }
   227  
   228  // grow the inflight buffer by doubling up to inflights.size. We grow on demand
   229  // instead of preallocating to inflights.size to handle systems which have
   230  // thousands of Raft groups per process.
   231  func (in *inflights) growBuf() {
   232  	newSize := len(in.buffer) * 2
   233  	if newSize == 0 {
   234  		newSize = 1
   235  	} else if newSize > in.size {
   236  		newSize = in.size
   237  	}
   238  	newBuffer := make([]uint64, newSize)
   239  	copy(newBuffer, in.buffer)
   240  	in.buffer = newBuffer
   241  }
   242  
   243  // freeTo frees the inflights smaller or equal to the given `to` flight.
   244  func (in *inflights) freeTo(to uint64) {
   245  	if in.count == 0 || to < in.buffer[in.start] {
   246  		// out of the left side of the window
   247  		return
   248  	}
   249  
   250  	idx := in.start
   251  	var i int
   252  	for i = 0; i < in.count; i++ {
   253  		if to < in.buffer[idx] { // found the first large inflight
   254  			break
   255  		}
   256  
   257  		// increase index and maybe rotate
   258  		size := in.size
   259  		if idx++; idx >= size {
   260  			idx -= size
   261  		}
   262  	}
   263  	// free i inflights and set new start index
   264  	in.count -= i
   265  	in.start = idx
   266  	if in.count == 0 {
   267  		// inflights is empty, reset the start index so that we don't grow the
   268  		// buffer unnecessarily.
   269  		in.start = 0
   270  	}
   271  }
   272  
   273  func (in *inflights) freeFirstOne() { in.freeTo(in.buffer[in.start]) }
   274  
   275  // full returns true if the inflights is full.
   276  func (in *inflights) full() bool {
   277  	return in.count == in.size
   278  }
   279  
   280  // resets frees all inflights.
   281  func (in *inflights) reset() {
   282  	in.count = 0
   283  	in.start = 0
   284  }