github.com/cilium/cilium@v1.16.2/pkg/backoff/backoff.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package backoff 5 6 import ( 7 "context" 8 "fmt" 9 "math" 10 "math/rand/v2" 11 12 "github.com/google/uuid" 13 "github.com/sirupsen/logrus" 14 15 "github.com/cilium/cilium/pkg/logging" 16 "github.com/cilium/cilium/pkg/logging/logfields" 17 "github.com/cilium/cilium/pkg/time" 18 ) 19 20 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "backoff") 21 22 // NodeManager is the interface required to implement cluster size dependent 23 // intervals 24 type NodeManager interface { 25 ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration 26 } 27 28 // nodeManager is a wrapper to enable using a plain function as NodeManager to implement 29 // cluster size dependent intervals 30 type nodeManager struct { 31 clusterSizeDependantInterval func(baseInterval time.Duration) time.Duration 32 } 33 34 // NewNodeManager returns a new NodeManager implementing cluster size dependent intervals 35 // based on the given function. If the function is nil, then no tuning is performed. 36 func NewNodeManager(clusterSizeDependantInterval func(baseInterval time.Duration) time.Duration) NodeManager { 37 return &nodeManager{clusterSizeDependantInterval: clusterSizeDependantInterval} 38 } 39 40 func (n *nodeManager) ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration { 41 if n.clusterSizeDependantInterval == nil { 42 return baseInterval 43 } 44 45 return n.clusterSizeDependantInterval(baseInterval) 46 } 47 48 // Exponential implements an exponential backoff 49 type Exponential struct { 50 // Min is the minimal backoff time, if unspecified, 1 second will be 51 // used 52 Min time.Duration 53 54 // Max is the maximum backoff time, if unspecified, no maximum time is 55 // applied 56 Max time.Duration 57 58 // Factor is the factor the backoff time grows exponentially, if 59 // unspecified, a factor of 2.0 will be used 60 Factor float64 61 62 // Jitter, when enabled, adds random jitter to the interval 63 Jitter bool 64 65 // NodeManager enables the use of cluster size dependent backoff 66 // intervals, i.e. the larger the cluster, the longer the backoff 67 // interval 68 NodeManager NodeManager 69 70 // Name is a free form string describing the operation subject to the 71 // backoff, if unspecified, a UUID is generated. This string is used 72 // for logging purposes. 73 Name string 74 75 // ResetAfter will reset the exponential back-off if no attempt is made for the amount of time specified here. 76 // Needs to be larger than the Max duration, otherwise it will be ignored to avoid accidental resets. 77 // If unspecified, no reset is performed. 78 ResetAfter time.Duration 79 80 lastBackoffStart time.Time 81 82 attempt int 83 } 84 85 // CalculateDuration calculates the backoff duration based on minimum base 86 // interval, exponential factor, jitter and number of failures. 87 func CalculateDuration(min, max time.Duration, factor float64, jitter bool, failures int) time.Duration { 88 minFloat := float64(min) 89 maxFloat := float64(max) 90 91 t := minFloat * math.Pow(factor, float64(failures)) 92 if max != time.Duration(0) && t > maxFloat { 93 t = maxFloat 94 } 95 96 if jitter { 97 t = rand.Float64()*(t-minFloat) + minFloat 98 } 99 100 return time.Duration(t) 101 } 102 103 // ClusterSizeDependantInterval returns a time.Duration that is dependent on 104 // the cluster size, i.e. the number of nodes that have been discovered. This 105 // can be used to control sync intervals of shared or centralized resources to 106 // avoid overloading these resources as the cluster grows. 107 // 108 // Example sync interval with baseInterval = 1 * time.Minute 109 // 110 // nodes | sync interval 111 // ------+----------------- 112 // 1 | 41.588830833s 113 // 2 | 1m05.916737320s 114 // 4 | 1m36.566274746s 115 // 8 | 2m11.833474640s 116 // 16 | 2m49.992800643s 117 // 32 | 3m29.790453687s 118 // 64 | 4m10.463236193s 119 // 128 | 4m51.588744261s 120 // 256 | 5m32.944565093s 121 // 512 | 6m14.416550710s 122 // 1024 | 6m55.946873494s 123 // 2048 | 7m37.506428894s 124 // 4096 | 8m19.080616652s 125 // 8192 | 9m00.662124608s 126 // 16384 | 9m42.247293667s 127 func ClusterSizeDependantInterval(baseInterval time.Duration, numNodes int) time.Duration { 128 // no nodes are being managed, no work will be performed, return 129 // baseInterval to check again in a reasonable timeframe 130 if numNodes == 0 { 131 return baseInterval 132 } 133 134 waitNanoseconds := float64(baseInterval.Nanoseconds()) * math.Log1p(float64(numNodes)) 135 return time.Duration(int64(waitNanoseconds)) 136 } 137 138 // Reset backoff attempt counter 139 func (b *Exponential) Reset() { 140 b.attempt = 0 141 } 142 143 // Wait waits for the required time using an exponential backoff 144 func (b *Exponential) Wait(ctx context.Context) error { 145 if resetDuration := b.ResetAfter; resetDuration != time.Duration(0) && resetDuration > b.Max { 146 if !b.lastBackoffStart.IsZero() { 147 if time.Since(b.lastBackoffStart) > resetDuration { 148 b.Reset() 149 } 150 } 151 } 152 153 b.lastBackoffStart = time.Now() 154 b.attempt++ 155 t := b.Duration(b.attempt) 156 157 log.WithFields(logrus.Fields{ 158 "time": t, 159 "attempt": b.attempt, 160 "name": b.Name, 161 }).Debug("Sleeping with exponential backoff") 162 163 select { 164 case <-ctx.Done(): 165 return fmt.Errorf("exponential backoff cancelled via context: %w", ctx.Err()) 166 case <-time.After(t): 167 } 168 169 return nil 170 } 171 172 // Duration returns the wait duration for the nth attempt 173 func (b *Exponential) Duration(attempt int) time.Duration { 174 if b.Name == "" { 175 b.Name = uuid.New().String() 176 } 177 178 min := time.Duration(1) * time.Second 179 if b.Min != time.Duration(0) { 180 min = b.Min 181 } 182 183 factor := float64(2) 184 if b.Factor != float64(0) { 185 factor = b.Factor 186 } 187 188 t := CalculateDuration(min, b.Max, factor, b.Jitter, attempt) 189 190 if b.NodeManager != nil { 191 t = b.NodeManager.ClusterSizeDependantInterval(t) 192 } 193 194 if b.Max != time.Duration(0) && t > b.Max { 195 t = b.Max 196 } 197 198 return t 199 }