agones.dev/agones@v1.53.0/pkg/util/workerqueue/workerqueue.go (about) 1 // Copyright 2018 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package workerqueue extends client-go's workqueue 16 // functionality into an opinionated queue + worker model that 17 // is reusable 18 package workerqueue 19 20 import ( 21 "context" 22 "fmt" 23 "sync" 24 "time" 25 26 "agones.dev/agones/pkg/util/logfields" 27 "agones.dev/agones/pkg/util/runtime" 28 "github.com/pkg/errors" 29 "github.com/sirupsen/logrus" 30 k8serror "k8s.io/apimachinery/pkg/api/errors" 31 "k8s.io/apimachinery/pkg/util/wait" 32 "k8s.io/client-go/tools/cache" 33 "k8s.io/client-go/util/workqueue" 34 ) 35 36 const ( 37 workFx = time.Second 38 ) 39 40 // traceError is a marker type for errors that that should only be logged at a Trace level. 41 // Useful if you want a Handler to be retried, but not logged at an Error level. 42 type traceError struct { 43 err error 44 } 45 46 // NewTraceError returns a traceError wrapper around an error. 47 func NewTraceError(err error) error { 48 return &traceError{err: err} 49 } 50 51 // Error returns the error string 52 func (l *traceError) Error() string { 53 if l.err == nil { 54 return "<nil>" 55 } 56 return l.err.Error() 57 } 58 59 // isTraceError returns if the error is a trace error or not 60 func isTraceError(err error) bool { 61 cause := errors.Cause(err) 62 _, ok := cause.(*traceError) 63 return ok 64 } 65 66 // Handler is the handler for processing the work queue 67 // This is usually a syncronisation handler for a controller or related 68 type Handler func(context.Context, string) error 69 70 // WorkerQueue is an opinionated queue + worker for use 71 // with controllers and related and processing Kubernetes watched 72 // events and synchronising resources 73 // 74 //nolint:govet // ignore fieldalignment, singleton 75 type WorkerQueue struct { 76 logger *logrus.Entry 77 keyName string 78 queue workqueue.TypedRateLimitingInterface[any] 79 // SyncHandler is exported to make testing easier (hack) 80 SyncHandler Handler 81 82 mu sync.Mutex 83 workers int 84 running int 85 } 86 87 // FastRateLimiter returns a rate limiter without exponential back-off, with specified maximum per-item retry delay. 88 func FastRateLimiter(maxDelay time.Duration) workqueue.TypedRateLimiter[any] { 89 const numFastRetries = 5 90 const fastDelay = 200 * time.Millisecond // first few retries up to 'numFastRetries' are fast 91 92 return workqueue.NewTypedItemFastSlowRateLimiter[any](fastDelay, maxDelay, numFastRetries) 93 } 94 95 // ConstantRateLimiter returns a rate limiter without exponential back-off, with constant retry delay. 96 func ConstantRateLimiter(maxDelay time.Duration) workqueue.TypedRateLimiter[any] { 97 const numFastRetries = 0 // only constant delay 98 const fastDelay = 200 * time.Millisecond // not needed 99 100 return workqueue.NewTypedItemFastSlowRateLimiter[any](fastDelay, maxDelay, numFastRetries) 101 } 102 103 // NewWorkerQueue returns a new worker queue for a given name 104 func NewWorkerQueue(handler Handler, logger *logrus.Entry, keyName logfields.ResourceType, queueName string) *WorkerQueue { 105 return NewWorkerQueueWithRateLimiter(handler, logger, keyName, queueName, workqueue.DefaultTypedControllerRateLimiter[any]()) 106 } 107 108 // NewWorkerQueueWithRateLimiter returns a new worker queue for a given name and a custom rate limiter. 109 func NewWorkerQueueWithRateLimiter(handler Handler, logger *logrus.Entry, keyName logfields.ResourceType, queueName string, rateLimiter workqueue.TypedRateLimiter[any]) *WorkerQueue { 110 return &WorkerQueue{ 111 keyName: string(keyName), 112 logger: logger.WithField("queue", queueName), 113 queue: workqueue.NewNamedRateLimitingQueue(rateLimiter, queueName), 114 SyncHandler: handler, 115 } 116 } 117 118 // Enqueue puts the name of the runtime.Object in the 119 // queue to be processed. If you need to send through an 120 // explicit key, use an cache.ExplicitKey 121 func (wq *WorkerQueue) Enqueue(obj interface{}) { 122 var key string 123 var err error 124 if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { 125 err = errors.Wrap(err, "Error creating key for object") 126 runtime.HandleError(wq.logger.WithField("obj", obj), err) 127 return 128 } 129 wq.logger.WithField(wq.keyName, key).Trace("Enqueuing") 130 wq.queue.AddRateLimited(key) 131 } 132 133 // EnqueueImmediately performs Enqueue but without rate-limiting. 134 // This should be used to continue partially completed work after giving other 135 // items in the queue a chance of running. 136 func (wq *WorkerQueue) EnqueueImmediately(obj interface{}) { 137 var key string 138 var err error 139 if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { 140 err = errors.Wrap(err, "Error creating key for object") 141 runtime.HandleError(wq.logger.WithField("obj", obj), err) 142 return 143 } 144 wq.logger.WithField(wq.keyName, key).Trace("Enqueuing immediately") 145 wq.queue.Add(key) 146 } 147 148 // EnqueueAfter delays an enqueue operation by duration 149 func (wq *WorkerQueue) EnqueueAfter(obj interface{}, duration time.Duration) { 150 var key string 151 var err error 152 if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { 153 err = errors.Wrap(err, "Error creating key for object") 154 runtime.HandleError(wq.logger.WithField("obj", obj), err) 155 return 156 } 157 158 wq.logger.WithField(wq.keyName, key).WithField("duration", duration).Trace("Enqueueing after duration") 159 wq.queue.AddAfter(key, duration) 160 } 161 162 // runWorker is a long-running function that will continually call the 163 // processNextWorkItem function in order to read and process a message on the 164 // workqueue. 165 func (wq *WorkerQueue) runWorker(ctx context.Context) { 166 for wq.processNextWorkItem(ctx) { 167 } 168 } 169 170 // processNextWorkItem processes the next work item. 171 // pretty self explanatory :) 172 func (wq *WorkerQueue) processNextWorkItem(ctx context.Context) bool { 173 obj, quit := wq.queue.Get() 174 if quit { 175 return false 176 } 177 defer wq.queue.Done(obj) 178 179 wq.logger.WithField(wq.keyName, obj).Debug("Processing") 180 181 var key string 182 var ok bool 183 if key, ok = obj.(string); !ok { 184 runtime.HandleError(wq.logger.WithField(wq.keyName, obj), errors.Errorf("expected string in queue, but got %T", obj)) 185 // this is a bad entry, we don't want to reprocess 186 wq.queue.Forget(obj) 187 return true 188 } 189 190 if err := wq.SyncHandler(ctx, key); err != nil { 191 // Conflicts are expected, so only show them in debug operations. 192 // Also check is traceError for other expected errors. 193 if k8serror.IsConflict(errors.Cause(err)) || isTraceError(err) { 194 wq.logger.WithField(wq.keyName, obj).Trace(err) 195 } else { 196 runtime.HandleError(wq.logger.WithField(wq.keyName, obj), err) 197 } 198 199 // we don't forget here, because we want this to be retried via the queue 200 wq.queue.AddRateLimited(obj) 201 return true 202 } 203 204 wq.queue.Forget(obj) 205 return true 206 } 207 208 // Run the WorkerQueue processing via the Handler. Will block until stop is closed. 209 // Runs a certain number workers to process the rate limited queue 210 func (wq *WorkerQueue) Run(ctx context.Context, workers int) { 211 wq.setWorkerCount(workers) 212 wq.logger.WithField("workers", workers).Info("Starting workers...") 213 for i := 0; i < workers; i++ { 214 go wq.run(ctx) 215 } 216 217 <-ctx.Done() 218 wq.logger.Info("...shutting down workers") 219 wq.queue.ShutDown() 220 } 221 222 func (wq *WorkerQueue) run(ctx context.Context) { 223 wq.inc() 224 defer wq.dec() 225 wait.Until(func() { wq.runWorker(ctx) }, workFx, ctx.Done()) 226 } 227 228 // Healthy reports whether all the worker goroutines are running. 229 func (wq *WorkerQueue) Healthy() error { 230 wq.mu.Lock() 231 defer wq.mu.Unlock() 232 want := wq.workers 233 got := wq.running 234 235 if want != got { 236 return fmt.Errorf("want %d worker goroutine(s), got %d", want, got) 237 } 238 return nil 239 } 240 241 // RunCount reports the number of running worker goroutines started by Run. 242 func (wq *WorkerQueue) RunCount() int { 243 wq.mu.Lock() 244 defer wq.mu.Unlock() 245 return wq.running 246 } 247 248 func (wq *WorkerQueue) setWorkerCount(n int) { 249 wq.mu.Lock() 250 defer wq.mu.Unlock() 251 wq.workers = n 252 } 253 254 func (wq *WorkerQueue) inc() { 255 wq.mu.Lock() 256 defer wq.mu.Unlock() 257 wq.running++ 258 } 259 260 func (wq *WorkerQueue) dec() { 261 wq.mu.Lock() 262 defer wq.mu.Unlock() 263 wq.running-- 264 }