github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/pacemaker/timeout/controller.go (about)

     1  package timeout
     2  
     3  import (
     4  	"context"
     5  	"math"
     6  	"time"
     7  
     8  	"github.com/onflow/flow-go/consensus/hotstuff/model"
     9  )
    10  
    11  // Controller implements the following truncated exponential backoff:
    12  //
    13  //	duration = t_min * min(b ^ ((r-k) * θ(r-k)), t_max)
    14  //
    15  // For practical purpose we will transform this formula into:
    16  //
    17  //	duration(r) = t_min * b ^ (min((r-k) * θ(r-k)), c), where c = log_b (t_max / t_min).
    18  //
    19  // In described formula:
    20  //
    21  //	  k - is number of rounds we expect during hot path, after failing this many rounds,
    22  //	      we will start increasing timeouts.
    23  //	  b - timeout increase factor
    24  //	  r - failed rounds counter
    25  //	  θ - Heaviside step function
    26  //		 t_min/t_max - minimum/maximum round duration
    27  //
    28  // By manipulating `r` after observing progress or lack thereof, we are achieving exponential increase/decrease
    29  // of round durations.
    30  //   - on timeout: increase number of failed rounds, this results in exponential growing round duration
    31  //     on multiple subsequent timeouts, after exceeding k.
    32  //   - on progress: decrease number of failed rounds, this results in exponential decrease of round duration.
    33  type Controller struct {
    34  	cfg            Config
    35  	timeoutChannel chan time.Time
    36  	stopTicker     context.CancelFunc
    37  	maxExponent    float64 // max exponent for exponential function, derived from maximum round duration
    38  	r              uint64  // failed rounds counter, higher value results in longer round duration
    39  }
    40  
    41  // NewController creates a new Controller. Note that the input Config is implemented such that
    42  // it can be passed by value, while still supporting updates of `BlockRateDelayMS` at runtime
    43  // (all configs share the same memory holding `BlockRateDelayMS`).
    44  func NewController(timeoutConfig Config) *Controller {
    45  	// the initial value for the timeout channel is a closed channel which returns immediately
    46  	// this prevents indefinite blocking when no timeout has been started
    47  	startChannel := make(chan time.Time)
    48  	close(startChannel)
    49  
    50  	// we need to calculate log_b(t_max/t_min), golang doesn't support logarithm with custom base
    51  	// we will apply change of base logarithm transformation to get around this:
    52  	// log_b(x) = log_e(x) / log_e(b)
    53  	maxExponent := math.Log(timeoutConfig.MaxReplicaTimeout/timeoutConfig.MinReplicaTimeout) /
    54  		math.Log(timeoutConfig.TimeoutAdjustmentFactor)
    55  
    56  	tc := Controller{
    57  		cfg:            timeoutConfig,
    58  		timeoutChannel: startChannel,
    59  		stopTicker:     func() {},
    60  		maxExponent:    maxExponent,
    61  	}
    62  	return &tc
    63  }
    64  
    65  // Channel returns a channel that will receive the specific timeout.
    66  // A new channel is created on each call of `StartTimeout`.
    67  // Returns closed channel if no timer has been started.
    68  func (t *Controller) Channel() <-chan time.Time {
    69  	return t.timeoutChannel
    70  }
    71  
    72  // StartTimeout starts the timeout of the specified type and returns the timer info
    73  func (t *Controller) StartTimeout(ctx context.Context, view uint64) model.TimerInfo {
    74  	t.stopTicker() // stop old timeout
    75  
    76  	// setup new timer
    77  	durationMs := t.replicaTimeout()                                                         // duration of current view in units of Milliseconds
    78  	rebroadcastIntervalMs := math.Min(durationMs, t.cfg.MaxTimeoutObjectRebroadcastInterval) // time between attempted re-broadcast of timeouts if there is no progress
    79  	t.timeoutChannel = make(chan time.Time, 1)                                               // channel for delivering timeouts
    80  
    81  	// start timeout logic for (re-)broadcasting timeout objects on regular basis as long as we are in the same round.
    82  	var childContext context.Context
    83  	childContext, t.stopTicker = context.WithCancel(ctx)
    84  	duration := time.Duration(durationMs) * time.Millisecond
    85  	rebroadcastInterval := time.Duration(rebroadcastIntervalMs) * time.Millisecond
    86  	go tickAfterTimeout(childContext, duration, rebroadcastInterval, t.timeoutChannel)
    87  
    88  	return model.TimerInfo{View: view, StartTime: time.Now().UTC(), Duration: duration}
    89  }
    90  
    91  // tickAfterTimeout is a utility function which:
    92  //  1. waits for the initial timeout and then sends the current time to `timeoutChannel`
    93  //  2. and subsequently sends the current time every `tickInterval` to `timeoutChannel`
    94  //
    95  // If the receiver from the `timeoutChannel` falls behind and does not pick up the events,
    96  // we drop ticks until the receiver catches up. When cancelling `ctx`, all timing logic stops.
    97  // This approach allows to have a concurrent-safe implementation, where there is no unsafe state sharing between caller and
    98  // ticking logic.
    99  func tickAfterTimeout(ctx context.Context, duration time.Duration, tickInterval time.Duration, timeoutChannel chan<- time.Time) {
   100  	// wait for initial timeout
   101  	timer := time.NewTimer(duration)
   102  	select {
   103  	case t := <-timer.C:
   104  		timeoutChannel <- t // forward initial timeout to the sink
   105  	case <-ctx.Done():
   106  		timer.Stop() // allows timer to be garbage collected (before it expires)
   107  		return
   108  	}
   109  
   110  	// after we have reached the initial timeout, sent to `tickSink` every `tickInterval` until cancelled
   111  	ticker := time.NewTicker(tickInterval)
   112  	for {
   113  		select {
   114  		case t := <-ticker.C:
   115  			timeoutChannel <- t // forward ticks to the sink
   116  		case <-ctx.Done():
   117  			ticker.Stop() // critical for ticker to be garbage collected
   118  			return
   119  		}
   120  	}
   121  }
   122  
   123  // replicaTimeout returns the duration of the current view in milliseconds before we time out
   124  func (t *Controller) replicaTimeout() float64 {
   125  	if t.r <= t.cfg.HappyPathMaxRoundFailures {
   126  		return t.cfg.MinReplicaTimeout
   127  	}
   128  	r := float64(t.r - t.cfg.HappyPathMaxRoundFailures)
   129  	if r >= t.maxExponent {
   130  		return t.cfg.MaxReplicaTimeout
   131  	}
   132  	// compute timeout duration [in milliseconds]:
   133  	return t.cfg.MinReplicaTimeout * math.Pow(t.cfg.TimeoutAdjustmentFactor, r)
   134  }
   135  
   136  // OnTimeout indicates to the Controller that a view change was triggered by a TC (unhappy path).
   137  func (t *Controller) OnTimeout() {
   138  	if float64(t.r) >= t.maxExponent+float64(t.cfg.HappyPathMaxRoundFailures) {
   139  		return
   140  	}
   141  	t.r++
   142  }
   143  
   144  // OnProgressBeforeTimeout indicates to the Controller that progress was made _before_ the timeout was reached
   145  func (t *Controller) OnProgressBeforeTimeout() {
   146  	if t.r > 0 {
   147  		t.r--
   148  	}
   149  }