agones.dev/agones@v1.53.0/pkg/util/workerqueue/workerqueue.go (about)

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package workerqueue extends client-go's workqueue
    16  // functionality into an opinionated queue + worker model that
    17  // is reusable
    18  package workerqueue
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"agones.dev/agones/pkg/util/logfields"
    27  	"agones.dev/agones/pkg/util/runtime"
    28  	"github.com/pkg/errors"
    29  	"github.com/sirupsen/logrus"
    30  	k8serror "k8s.io/apimachinery/pkg/api/errors"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	"k8s.io/client-go/tools/cache"
    33  	"k8s.io/client-go/util/workqueue"
    34  )
    35  
    36  const (
    37  	workFx = time.Second
    38  )
    39  
    40  // traceError is a marker type for errors that that should only be logged at a Trace level.
    41  // Useful if you want a Handler to be retried, but not logged at an Error level.
    42  type traceError struct {
    43  	err error
    44  }
    45  
    46  // NewTraceError returns a traceError wrapper around an error.
    47  func NewTraceError(err error) error {
    48  	return &traceError{err: err}
    49  }
    50  
    51  // Error returns the error string
    52  func (l *traceError) Error() string {
    53  	if l.err == nil {
    54  		return "<nil>"
    55  	}
    56  	return l.err.Error()
    57  }
    58  
    59  // isTraceError returns if the error is a trace error or not
    60  func isTraceError(err error) bool {
    61  	cause := errors.Cause(err)
    62  	_, ok := cause.(*traceError)
    63  	return ok
    64  }
    65  
    66  // Handler is the handler for processing the work queue
    67  // This is usually a syncronisation handler for a controller or related
    68  type Handler func(context.Context, string) error
    69  
    70  // WorkerQueue is an opinionated queue + worker for use
    71  // with controllers and related and processing Kubernetes watched
    72  // events and synchronising resources
    73  //
    74  //nolint:govet // ignore fieldalignment, singleton
    75  type WorkerQueue struct {
    76  	logger  *logrus.Entry
    77  	keyName string
    78  	queue   workqueue.TypedRateLimitingInterface[any]
    79  	// SyncHandler is exported to make testing easier (hack)
    80  	SyncHandler Handler
    81  
    82  	mu      sync.Mutex
    83  	workers int
    84  	running int
    85  }
    86  
    87  // FastRateLimiter returns a rate limiter without exponential back-off, with specified maximum per-item retry delay.
    88  func FastRateLimiter(maxDelay time.Duration) workqueue.TypedRateLimiter[any] {
    89  	const numFastRetries = 5
    90  	const fastDelay = 200 * time.Millisecond // first few retries up to 'numFastRetries' are fast
    91  
    92  	return workqueue.NewTypedItemFastSlowRateLimiter[any](fastDelay, maxDelay, numFastRetries)
    93  }
    94  
    95  // ConstantRateLimiter returns a rate limiter without exponential back-off, with constant retry delay.
    96  func ConstantRateLimiter(maxDelay time.Duration) workqueue.TypedRateLimiter[any] {
    97  	const numFastRetries = 0                 // only constant delay
    98  	const fastDelay = 200 * time.Millisecond // not needed
    99  
   100  	return workqueue.NewTypedItemFastSlowRateLimiter[any](fastDelay, maxDelay, numFastRetries)
   101  }
   102  
   103  // NewWorkerQueue returns a new worker queue for a given name
   104  func NewWorkerQueue(handler Handler, logger *logrus.Entry, keyName logfields.ResourceType, queueName string) *WorkerQueue {
   105  	return NewWorkerQueueWithRateLimiter(handler, logger, keyName, queueName, workqueue.DefaultTypedControllerRateLimiter[any]())
   106  }
   107  
   108  // NewWorkerQueueWithRateLimiter returns a new worker queue for a given name and a custom rate limiter.
   109  func NewWorkerQueueWithRateLimiter(handler Handler, logger *logrus.Entry, keyName logfields.ResourceType, queueName string, rateLimiter workqueue.TypedRateLimiter[any]) *WorkerQueue {
   110  	return &WorkerQueue{
   111  		keyName:     string(keyName),
   112  		logger:      logger.WithField("queue", queueName),
   113  		queue:       workqueue.NewNamedRateLimitingQueue(rateLimiter, queueName),
   114  		SyncHandler: handler,
   115  	}
   116  }
   117  
   118  // Enqueue puts the name of the runtime.Object in the
   119  // queue to be processed. If you need to send through an
   120  // explicit key, use an cache.ExplicitKey
   121  func (wq *WorkerQueue) Enqueue(obj interface{}) {
   122  	var key string
   123  	var err error
   124  	if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
   125  		err = errors.Wrap(err, "Error creating key for object")
   126  		runtime.HandleError(wq.logger.WithField("obj", obj), err)
   127  		return
   128  	}
   129  	wq.logger.WithField(wq.keyName, key).Trace("Enqueuing")
   130  	wq.queue.AddRateLimited(key)
   131  }
   132  
   133  // EnqueueImmediately performs Enqueue but without rate-limiting.
   134  // This should be used to continue partially completed work after giving other
   135  // items in the queue a chance of running.
   136  func (wq *WorkerQueue) EnqueueImmediately(obj interface{}) {
   137  	var key string
   138  	var err error
   139  	if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
   140  		err = errors.Wrap(err, "Error creating key for object")
   141  		runtime.HandleError(wq.logger.WithField("obj", obj), err)
   142  		return
   143  	}
   144  	wq.logger.WithField(wq.keyName, key).Trace("Enqueuing immediately")
   145  	wq.queue.Add(key)
   146  }
   147  
   148  // EnqueueAfter delays an enqueue operation by duration
   149  func (wq *WorkerQueue) EnqueueAfter(obj interface{}, duration time.Duration) {
   150  	var key string
   151  	var err error
   152  	if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
   153  		err = errors.Wrap(err, "Error creating key for object")
   154  		runtime.HandleError(wq.logger.WithField("obj", obj), err)
   155  		return
   156  	}
   157  
   158  	wq.logger.WithField(wq.keyName, key).WithField("duration", duration).Trace("Enqueueing after duration")
   159  	wq.queue.AddAfter(key, duration)
   160  }
   161  
   162  // runWorker is a long-running function that will continually call the
   163  // processNextWorkItem function in order to read and process a message on the
   164  // workqueue.
   165  func (wq *WorkerQueue) runWorker(ctx context.Context) {
   166  	for wq.processNextWorkItem(ctx) {
   167  	}
   168  }
   169  
   170  // processNextWorkItem processes the next work item.
   171  // pretty self explanatory :)
   172  func (wq *WorkerQueue) processNextWorkItem(ctx context.Context) bool {
   173  	obj, quit := wq.queue.Get()
   174  	if quit {
   175  		return false
   176  	}
   177  	defer wq.queue.Done(obj)
   178  
   179  	wq.logger.WithField(wq.keyName, obj).Debug("Processing")
   180  
   181  	var key string
   182  	var ok bool
   183  	if key, ok = obj.(string); !ok {
   184  		runtime.HandleError(wq.logger.WithField(wq.keyName, obj), errors.Errorf("expected string in queue, but got %T", obj))
   185  		// this is a bad entry, we don't want to reprocess
   186  		wq.queue.Forget(obj)
   187  		return true
   188  	}
   189  
   190  	if err := wq.SyncHandler(ctx, key); err != nil {
   191  		// Conflicts are expected, so only show them in debug operations.
   192  		// Also check is traceError for other expected errors.
   193  		if k8serror.IsConflict(errors.Cause(err)) || isTraceError(err) {
   194  			wq.logger.WithField(wq.keyName, obj).Trace(err)
   195  		} else {
   196  			runtime.HandleError(wq.logger.WithField(wq.keyName, obj), err)
   197  		}
   198  
   199  		// we don't forget here, because we want this to be retried via the queue
   200  		wq.queue.AddRateLimited(obj)
   201  		return true
   202  	}
   203  
   204  	wq.queue.Forget(obj)
   205  	return true
   206  }
   207  
   208  // Run the WorkerQueue processing via the Handler. Will block until stop is closed.
   209  // Runs a certain number workers to process the rate limited queue
   210  func (wq *WorkerQueue) Run(ctx context.Context, workers int) {
   211  	wq.setWorkerCount(workers)
   212  	wq.logger.WithField("workers", workers).Info("Starting workers...")
   213  	for i := 0; i < workers; i++ {
   214  		go wq.run(ctx)
   215  	}
   216  
   217  	<-ctx.Done()
   218  	wq.logger.Info("...shutting down workers")
   219  	wq.queue.ShutDown()
   220  }
   221  
   222  func (wq *WorkerQueue) run(ctx context.Context) {
   223  	wq.inc()
   224  	defer wq.dec()
   225  	wait.Until(func() { wq.runWorker(ctx) }, workFx, ctx.Done())
   226  }
   227  
   228  // Healthy reports whether all the worker goroutines are running.
   229  func (wq *WorkerQueue) Healthy() error {
   230  	wq.mu.Lock()
   231  	defer wq.mu.Unlock()
   232  	want := wq.workers
   233  	got := wq.running
   234  
   235  	if want != got {
   236  		return fmt.Errorf("want %d worker goroutine(s), got %d", want, got)
   237  	}
   238  	return nil
   239  }
   240  
   241  // RunCount reports the number of running worker goroutines started by Run.
   242  func (wq *WorkerQueue) RunCount() int {
   243  	wq.mu.Lock()
   244  	defer wq.mu.Unlock()
   245  	return wq.running
   246  }
   247  
   248  func (wq *WorkerQueue) setWorkerCount(n int) {
   249  	wq.mu.Lock()
   250  	defer wq.mu.Unlock()
   251  	wq.workers = n
   252  }
   253  
   254  func (wq *WorkerQueue) inc() {
   255  	wq.mu.Lock()
   256  	defer wq.mu.Unlock()
   257  	wq.running++
   258  }
   259  
   260  func (wq *WorkerQueue) dec() {
   261  	wq.mu.Lock()
   262  	defer wq.mu.Unlock()
   263  	wq.running--
   264  }