github.com/unionj-cloud/go-doudou/v2@v2.3.5/toolkit/memberlist/suspicion.go (about)

     1  package memberlist
     2  
     3  import (
     4  	"math"
     5  	"sync/atomic"
     6  	"time"
     7  )
     8  
     9  // suspicion manages the suspect timer for a node and provides an interface
    10  // to accelerate the timeout as we get more independent confirmations that
    11  // a node is suspect.
    12  type suspicion struct {
    13  	// n is the number of independent confirmations we've seen. This must
    14  	// be updated using atomic instructions to prevent contention with the
    15  	// timer callback.
    16  	n int32
    17  
    18  	// k is the number of independent confirmations we'd like to see in
    19  	// order to drive the timer to its minimum value.
    20  	k int32
    21  
    22  	// min is the minimum timer value.
    23  	min time.Duration
    24  
    25  	// max is the maximum timer value.
    26  	max time.Duration
    27  
    28  	// start captures the timestamp when we began the timer. This is used
    29  	// so we can calculate durations to feed the timer during updates in
    30  	// a way the achieves the overall time we'd like.
    31  	start time.Time
    32  
    33  	// timer is the underlying timer that implements the timeout.
    34  	timer *time.Timer
    35  
    36  	// f is the function to call when the timer expires. We hold on to this
    37  	// because there are cases where we call it directly.
    38  	timeoutFn func()
    39  
    40  	// confirmations is a map of "from" nodes that have confirmed a given
    41  	// node is suspect. This prevents double counting.
    42  	confirmations map[string]struct{}
    43  }
    44  
    45  // newSuspicion returns a timer started with the max time, and that will drive
    46  // to the min time after seeing k or more confirmations. The from node will be
    47  // excluded from confirmations since we might get our own suspicion message
    48  // gossiped back to us. The minimum time will be used if no confirmations are
    49  // called for (k <= 0).
    50  func newSuspicion(from string, k int, min time.Duration, max time.Duration, fn func(int)) *suspicion {
    51  	s := &suspicion{
    52  		k:             int32(k),
    53  		min:           min,
    54  		max:           max,
    55  		confirmations: make(map[string]struct{}),
    56  	}
    57  
    58  	// Exclude the from node from any confirmations.
    59  	s.confirmations[from] = struct{}{}
    60  
    61  	// Pass the number of confirmations into the timeout function for
    62  	// easy telemetry.
    63  	s.timeoutFn = func() {
    64  		fn(int(atomic.LoadInt32(&s.n)))
    65  	}
    66  
    67  	// If there aren't any confirmations to be made then take the min
    68  	// time from the start.
    69  	timeout := max
    70  	if k < 1 {
    71  		timeout = min
    72  	}
    73  	s.timer = time.AfterFunc(timeout, s.timeoutFn)
    74  
    75  	// Capture the start time right after starting the timer above so
    76  	// we should always err on the side of a little longer timeout if
    77  	// there's any preemption that separates this and the step above.
    78  	s.start = time.Now()
    79  	return s
    80  }
    81  
    82  // remainingSuspicionTime takes the state variables of the suspicion timer and
    83  // calculates the remaining time to wait before considering a node dead. The
    84  // return value can be negative, so be prepared to fire the timer immediately in
    85  // that case.
    86  func remainingSuspicionTime(n, k int32, elapsed time.Duration, min, max time.Duration) time.Duration {
    87  	frac := math.Log(float64(n)+1.0) / math.Log(float64(k)+1.0)
    88  	raw := max.Seconds() - frac*(max.Seconds()-min.Seconds())
    89  	timeout := time.Duration(math.Floor(1000.0*raw)) * time.Millisecond
    90  	if timeout < min {
    91  		timeout = min
    92  	}
    93  
    94  	// We have to take into account the amount of time that has passed so
    95  	// far, so we get the right overall timeout.
    96  	return timeout - elapsed
    97  }
    98  
    99  // Confirm registers that a possibly new peer has also determined the given
   100  // node is suspect. This returns true if this was new information, and false
   101  // if it was a duplicate confirmation, or if we've got enough confirmations to
   102  // hit the minimum.
   103  func (s *suspicion) Confirm(from string) bool {
   104  	// If we've got enough confirmations then stop accepting them.
   105  	if atomic.LoadInt32(&s.n) >= s.k {
   106  		return false
   107  	}
   108  
   109  	// Only allow one confirmation from each possible peer.
   110  	if _, ok := s.confirmations[from]; ok {
   111  		return false
   112  	}
   113  	s.confirmations[from] = struct{}{}
   114  
   115  	// Compute the new timeout given the current number of confirmations and
   116  	// adjust the timer. If the timeout becomes negative *and* we can cleanly
   117  	// stop the timer then we will call the timeout function directly from
   118  	// here.
   119  	n := atomic.AddInt32(&s.n, 1)
   120  	elapsed := time.Since(s.start)
   121  	remaining := remainingSuspicionTime(n, s.k, elapsed, s.min, s.max)
   122  	if s.timer.Stop() {
   123  		if remaining > 0 {
   124  			s.timer.Reset(remaining)
   125  		} else {
   126  			go s.timeoutFn()
   127  		}
   128  	}
   129  	return true
   130  }