github.com/grailbio/base@v0.0.11/admit/admit.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 // Package admit contains utilities for admission control. 6 package admit 7 8 import ( 9 "context" 10 "expvar" 11 "sync" 12 13 "github.com/grailbio/base/log" 14 "github.com/grailbio/base/retry" 15 "github.com/grailbio/base/sync/ctxsync" 16 ) 17 18 var ( 19 admitLimit = expvar.NewMap("admit.limit") 20 admitUsed = expvar.NewMap("admit.used") 21 ) 22 23 // Policy implements the low level details of an admission control 24 // policy. Users typically use a utility function such as admit.Do 25 // or admit.Retry. 26 type Policy interface { 27 // Acquire acquires a number of tokens from the admission controller. 28 // Returns on success, or if the context was canceled. 29 // Acquire can also return with an error if the number of requested tokens 30 // exceeds the upper limit of available tokens. 31 Acquire(ctx context.Context, need int) error 32 33 // Release a number of tokens to the admission controller, 34 // reporting whether the request was within the capacity limits. 35 Release(tokens int, ok bool) 36 } 37 38 // Do calls f after being admitted by the controller. f's bool return value is 39 // passed on to the underlying policy upon Release, and the error is simply 40 // returned back to the caller as a convenience. 41 // If policy is nil, then this will simply call f. 42 func Do(ctx context.Context, policy Policy, tokens int, f func() (bool, error)) error { 43 if policy == nil { 44 _, err := f() 45 return err 46 } 47 if err := policy.Acquire(ctx, tokens); err != nil { 48 return err 49 } 50 var ( 51 ok bool 52 err error 53 ) 54 defer func() { policy.Release(tokens, ok) }() 55 ok, err = f() 56 return err 57 } 58 59 // CapacityStatus is the feedback provided by the user to Retry about the underlying resource being managed by Policy. 60 type CapacityStatus int 61 62 const ( 63 // Within means that the underlying resource is within capacity. 64 Within CapacityStatus = iota 65 // OverNoRetry means that the underlying resource is over capacity but no retry is needed. 66 // This is useful in situations where a request using the resource succeeded, but there are 67 // signs of congestion (for example, in the form of high latency). 68 OverNoRetry 69 // OverNeedRetry means that the underlying resource is over capacity and a retry is needed. 70 // This is useful in situations where requests failed due to the underlying resource hitting capacity limits. 71 OverNeedRetry 72 ) 73 74 // RetryPolicy combines an admission controller with a retry policy. 75 type RetryPolicy interface { 76 Policy 77 retry.Policy 78 } 79 80 // Retry calls f after being admitted by the Policy (implied by the given RetryPolicy). 81 // If f returns Within, true is passed to the underlying policy upon Release and false otherwise. 82 // If f returns OverNeedRetry, f will be retried as per the RetryPolicy (and the error returned by f is ignored), 83 // and if f can no longer be retried, the error returned by retry.Policy will be returned. 84 func Retry(ctx context.Context, policy RetryPolicy, tokens int, f func() (CapacityStatus, error)) error { 85 var err error 86 for retries := 0; ; retries++ { 87 var c CapacityStatus 88 err = Do(ctx, policy, tokens, func() (bool, error) { 89 var err error // nolint:govet 90 c, err = f() 91 return c == Within, err 92 }) 93 // Retry as per retry policy if attempt failed due to over capacity. 94 if c != OverNeedRetry { 95 break 96 } 97 if err = retry.Wait(ctx, policy, retries); err != nil { 98 break 99 } 100 log.Debug.Printf("admit.Retry: %v, retries=%d", err, retries) 101 } 102 return err 103 } 104 105 const defaultLimitChangeRate = 0.1 106 107 // Adjust changes the limit by factor. 108 func adjust(limit int, factor float32) int { 109 return int(float32(limit) * (1 + factor)) 110 } 111 112 func min(x, y int) int { 113 if x < y { 114 return x 115 } 116 return y 117 } 118 119 func max(x, y int) int { 120 if x > y { 121 return x 122 } 123 return y 124 } 125 126 type controller struct { 127 // limit, used are the current limit and current used tokens respectively. 128 limit, used int 129 // low, high define the range within which the limit can be adjusted. 130 low, high int 131 mu sync.Mutex 132 cond *ctxsync.Cond 133 limitVar, usedVar expvar.Int 134 } 135 136 type controllerWithRetry struct { 137 *controller 138 retry.Policy 139 } 140 141 func newController(start, limit int) *controller { 142 c := &controller{limit: start, used: 0, low: start, high: limit} 143 c.cond = ctxsync.NewCond(&c.mu) 144 return c 145 } 146 147 // Controller returns a Policy which starts with a concurrency limit of 'start' 148 // and can grow upto a maximum of 'limit' as long as errors aren't observed. 149 // A controller is not fair: tokens are not granted in FIFO order; 150 // rather, waiters are picked randomly to be granted new tokens. 151 func Controller(start, limit int) Policy { 152 return newController(start, limit) 153 } 154 155 // ControllerWithRetry returns a RetryPolicy which starts with a concurrency 156 // limit of 'start' and can grow upto a maximum of 'limit' if no errors are seen. 157 // A controller is not fair: tokens are not granted in FIFO order; 158 // rather, waiters are picked randomly to be granted new tokens. 159 func ControllerWithRetry(start, limit int, retryPolicy retry.Policy) RetryPolicy { 160 return controllerWithRetry{controller: newController(start, limit), Policy: retryPolicy} 161 } 162 163 // EnableVarExport enables the export of relevant vars useful for debugging/monitoring. 164 func EnableVarExport(policy Policy, name string) { 165 switch c := policy.(type) { 166 case *controller: 167 admitLimit.Set(name, &c.limitVar) 168 admitUsed.Set(name, &c.usedVar) 169 case *aimd: 170 admitLimit.Set(name, &c.limitVar) 171 admitUsed.Set(name, &c.usedVar) 172 } 173 } 174 175 // Acquire acquires a number of tokens from the admission controller. 176 // Returns on success, or if the context was canceled. 177 func (c *controller) Acquire(ctx context.Context, need int) error { 178 c.mu.Lock() 179 defer c.mu.Unlock() 180 for { 181 // TODO(swami): should allow an increase only when the last release was ok 182 lim := min(adjust(c.limit, defaultLimitChangeRate), c.high) 183 have := lim - c.used 184 if need <= have || (need > lim && c.used == 0) { 185 c.used += need 186 c.usedVar.Set(int64(c.used)) 187 return nil 188 } 189 if err := c.cond.Wait(ctx); err != nil { 190 return err 191 } 192 } 193 } 194 195 // Release releases a number of tokens to the admission controller, 196 // reporting whether the request was within the capacity limits. 197 func (c *controller) Release(tokens int, ok bool) { 198 c.mu.Lock() 199 defer c.mu.Unlock() 200 if ok { 201 if c.used > c.limit { 202 c.limit = min(c.used, c.high) 203 } 204 } else { 205 c.limit = max(c.low, adjust(c.limit, -defaultLimitChangeRate)) 206 } 207 c.used -= tokens 208 209 c.limitVar.Set(int64(c.limit)) 210 c.usedVar.Set(int64(c.used)) 211 c.cond.Broadcast() 212 } 213 214 type aimd struct { 215 // limit, used are the current limit and current used tokens respectively. 216 limit, used int 217 // min is the minimum limit. 218 min int 219 // decfactor is the factor by which tokens are reduced upon congestion. 220 decfactor float32 221 222 mu sync.Mutex 223 cond *ctxsync.Cond 224 limitVar, usedVar expvar.Int 225 } 226 227 type aimdWithRetry struct { 228 *aimd 229 retry.Policy 230 } 231 232 func newAimd(min int, decfactor float32) *aimd { 233 c := &aimd{min: min, limit: min, decfactor: decfactor} 234 c.cond = ctxsync.NewCond(&c.mu) 235 return c 236 } 237 238 // AIMD returns a Policy which uses the Additive increase/multiplicative decrease 239 // algorithm for computing the amount of the concurrency to allow. 240 // AIMD is not fair: tokens are not granted in FIFO order; 241 // rather, waiters are picked randomly to be granted new tokens. 242 func AIMD(min int, decfactor float32) Policy { 243 return newAimd(min, decfactor) 244 } 245 246 // AIMDWithRetry returns a RetryPolicy which uses the Additive increase/multiplicative decrease 247 // algorithm for computing the amount of the concurrency to allow. 248 // AIMDWithRetry is not fair: tokens are not granted in FIFO order; 249 // rather, waiters are picked randomly to be granted new tokens. 250 func AIMDWithRetry(min int, decfactor float32, retryPolicy retry.Policy) RetryPolicy { 251 return aimdWithRetry{aimd: newAimd(min, decfactor), Policy: retryPolicy} 252 } 253 254 // Acquire acquires a number of tokens from the admission controller. 255 // Returns on success, or if the context was canceled. 256 func (c *aimd) Acquire(ctx context.Context, need int) error { 257 c.mu.Lock() 258 defer c.mu.Unlock() 259 for { 260 have := c.limit - c.used 261 if need <= have || (need > c.limit && c.used == 0) { 262 c.used += need 263 c.usedVar.Set(int64(c.used)) 264 return nil 265 } 266 if err := c.cond.Wait(ctx); err != nil { 267 return err 268 } 269 } 270 } 271 272 // Release releases a number of tokens to the admission controller, 273 // reporting whether the request was within the capacity limits. 274 func (c *aimd) Release(tokens int, ok bool) { 275 c.mu.Lock() 276 defer c.mu.Unlock() 277 switch { 278 case !ok: 279 c.limit = max(c.min, adjust(c.limit, -c.decfactor)) 280 case ok && c.used == c.limit: 281 c.limit += 1 282 } 283 c.used -= tokens 284 285 c.limitVar.Set(int64(c.limit)) 286 c.usedVar.Set(int64(c.used)) 287 c.cond.Broadcast() 288 }