github.com/cilium/cilium@v1.16.2/pkg/backoff/backoff.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package backoff
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"math"
    10  	"math/rand/v2"
    11  
    12  	"github.com/google/uuid"
    13  	"github.com/sirupsen/logrus"
    14  
    15  	"github.com/cilium/cilium/pkg/logging"
    16  	"github.com/cilium/cilium/pkg/logging/logfields"
    17  	"github.com/cilium/cilium/pkg/time"
    18  )
    19  
    20  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "backoff")
    21  
    22  // NodeManager is the interface required to implement cluster size dependent
    23  // intervals
    24  type NodeManager interface {
    25  	ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration
    26  }
    27  
    28  // nodeManager is a wrapper to enable using a plain function as NodeManager to implement
    29  // cluster size dependent intervals
    30  type nodeManager struct {
    31  	clusterSizeDependantInterval func(baseInterval time.Duration) time.Duration
    32  }
    33  
    34  // NewNodeManager returns a new NodeManager implementing cluster size dependent intervals
    35  // based on the given function. If the function is nil, then no tuning is performed.
    36  func NewNodeManager(clusterSizeDependantInterval func(baseInterval time.Duration) time.Duration) NodeManager {
    37  	return &nodeManager{clusterSizeDependantInterval: clusterSizeDependantInterval}
    38  }
    39  
    40  func (n *nodeManager) ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration {
    41  	if n.clusterSizeDependantInterval == nil {
    42  		return baseInterval
    43  	}
    44  
    45  	return n.clusterSizeDependantInterval(baseInterval)
    46  }
    47  
    48  // Exponential implements an exponential backoff
    49  type Exponential struct {
    50  	// Min is the minimal backoff time, if unspecified, 1 second will be
    51  	// used
    52  	Min time.Duration
    53  
    54  	// Max is the maximum backoff time, if unspecified, no maximum time is
    55  	// applied
    56  	Max time.Duration
    57  
    58  	// Factor is the factor the backoff time grows exponentially, if
    59  	// unspecified, a factor of 2.0 will be used
    60  	Factor float64
    61  
    62  	// Jitter, when enabled, adds random jitter to the interval
    63  	Jitter bool
    64  
    65  	// NodeManager enables the use of cluster size dependent backoff
    66  	// intervals, i.e. the larger the cluster, the longer the backoff
    67  	// interval
    68  	NodeManager NodeManager
    69  
    70  	// Name is a free form string describing the operation subject to the
    71  	// backoff, if unspecified, a UUID is generated. This string is used
    72  	// for logging purposes.
    73  	Name string
    74  
    75  	// ResetAfter will reset the exponential back-off if no attempt is made for the amount of time specified here.
    76  	// Needs to be larger than the Max duration, otherwise it will be ignored to avoid accidental resets.
    77  	// If unspecified, no reset is performed.
    78  	ResetAfter time.Duration
    79  
    80  	lastBackoffStart time.Time
    81  
    82  	attempt int
    83  }
    84  
    85  // CalculateDuration calculates the backoff duration based on minimum base
    86  // interval, exponential factor, jitter and number of failures.
    87  func CalculateDuration(min, max time.Duration, factor float64, jitter bool, failures int) time.Duration {
    88  	minFloat := float64(min)
    89  	maxFloat := float64(max)
    90  
    91  	t := minFloat * math.Pow(factor, float64(failures))
    92  	if max != time.Duration(0) && t > maxFloat {
    93  		t = maxFloat
    94  	}
    95  
    96  	if jitter {
    97  		t = rand.Float64()*(t-minFloat) + minFloat
    98  	}
    99  
   100  	return time.Duration(t)
   101  }
   102  
   103  // ClusterSizeDependantInterval returns a time.Duration that is dependent on
   104  // the cluster size, i.e. the number of nodes that have been discovered. This
   105  // can be used to control sync intervals of shared or centralized resources to
   106  // avoid overloading these resources as the cluster grows.
   107  //
   108  // Example sync interval with baseInterval = 1 * time.Minute
   109  //
   110  // nodes | sync interval
   111  // ------+-----------------
   112  // 1     |   41.588830833s
   113  // 2     | 1m05.916737320s
   114  // 4     | 1m36.566274746s
   115  // 8     | 2m11.833474640s
   116  // 16    | 2m49.992800643s
   117  // 32    | 3m29.790453687s
   118  // 64    | 4m10.463236193s
   119  // 128   | 4m51.588744261s
   120  // 256   | 5m32.944565093s
   121  // 512   | 6m14.416550710s
   122  // 1024  | 6m55.946873494s
   123  // 2048  | 7m37.506428894s
   124  // 4096  | 8m19.080616652s
   125  // 8192  | 9m00.662124608s
   126  // 16384 | 9m42.247293667s
   127  func ClusterSizeDependantInterval(baseInterval time.Duration, numNodes int) time.Duration {
   128  	// no nodes are being managed, no work will be performed, return
   129  	// baseInterval to check again in a reasonable timeframe
   130  	if numNodes == 0 {
   131  		return baseInterval
   132  	}
   133  
   134  	waitNanoseconds := float64(baseInterval.Nanoseconds()) * math.Log1p(float64(numNodes))
   135  	return time.Duration(int64(waitNanoseconds))
   136  }
   137  
   138  // Reset backoff attempt counter
   139  func (b *Exponential) Reset() {
   140  	b.attempt = 0
   141  }
   142  
   143  // Wait waits for the required time using an exponential backoff
   144  func (b *Exponential) Wait(ctx context.Context) error {
   145  	if resetDuration := b.ResetAfter; resetDuration != time.Duration(0) && resetDuration > b.Max {
   146  		if !b.lastBackoffStart.IsZero() {
   147  			if time.Since(b.lastBackoffStart) > resetDuration {
   148  				b.Reset()
   149  			}
   150  		}
   151  	}
   152  
   153  	b.lastBackoffStart = time.Now()
   154  	b.attempt++
   155  	t := b.Duration(b.attempt)
   156  
   157  	log.WithFields(logrus.Fields{
   158  		"time":    t,
   159  		"attempt": b.attempt,
   160  		"name":    b.Name,
   161  	}).Debug("Sleeping with exponential backoff")
   162  
   163  	select {
   164  	case <-ctx.Done():
   165  		return fmt.Errorf("exponential backoff cancelled via context: %w", ctx.Err())
   166  	case <-time.After(t):
   167  	}
   168  
   169  	return nil
   170  }
   171  
   172  // Duration returns the wait duration for the nth attempt
   173  func (b *Exponential) Duration(attempt int) time.Duration {
   174  	if b.Name == "" {
   175  		b.Name = uuid.New().String()
   176  	}
   177  
   178  	min := time.Duration(1) * time.Second
   179  	if b.Min != time.Duration(0) {
   180  		min = b.Min
   181  	}
   182  
   183  	factor := float64(2)
   184  	if b.Factor != float64(0) {
   185  		factor = b.Factor
   186  	}
   187  
   188  	t := CalculateDuration(min, b.Max, factor, b.Jitter, attempt)
   189  
   190  	if b.NodeManager != nil {
   191  		t = b.NodeManager.ClusterSizeDependantInterval(t)
   192  	}
   193  
   194  	if b.Max != time.Duration(0) && t > b.Max {
   195  		t = b.Max
   196  	}
   197  
   198  	return t
   199  }