github.com/operator-framework/operator-lifecycle-manager@v0.30.0/pkg/lib/queueinformer/queueinformer_operator.go (about)

     1  package queueinformer
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/kubestate"
    10  	"github.com/pkg/errors"
    11  	"github.com/sirupsen/logrus"
    12  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    13  	"k8s.io/client-go/discovery"
    14  	"k8s.io/client-go/tools/cache"
    15  )
    16  
    17  const (
    18  	defaultServerVersionInterval = 1 * time.Minute
    19  )
    20  
    21  // ExtensibleOperator describes a Reconciler that can be extended with additional informers and queue informers
    22  type ExtensibleOperator interface {
    23  	// RegisterQueueInformer registers the given QueueInformer with the Operator.
    24  	// This method returns an error if the Operator has already been started.
    25  	RegisterQueueInformer(queueInformer *QueueInformer) error
    26  
    27  	// RegisterInformer registers an informer with the Operator.
    28  	// This method returns an error if the Operator has already been started.
    29  	RegisterInformer(cache.SharedIndexInformer) error
    30  }
    31  
    32  // ObservableOperator describes a Reconciler whose state can be queried
    33  type ObservableOperator interface {
    34  	// Ready returns a channel that is closed when the Operator is ready to run.
    35  	Ready() <-chan struct{}
    36  
    37  	// Done returns a channel that is closed when the Operator is done running.
    38  	Done() <-chan struct{}
    39  
    40  	// AtLevel returns a channel that emits errors when the Operator is not at level.
    41  	AtLevel() <-chan error
    42  
    43  	// Started returns true if RunInformers() has been called, false otherwise.
    44  	Started() bool
    45  
    46  	// HasSynced returns true if the Operator's Informers have synced, false otherwise.
    47  	HasSynced() bool
    48  }
    49  
    50  // Operator describes a Reconciler that manages a set of QueueInformers.
    51  type Operator interface {
    52  	ObservableOperator
    53  	ExtensibleOperator
    54  	// RunInformers starts the Operator's underlying Informers.
    55  	RunInformers(ctx context.Context)
    56  
    57  	// Run starts the Operator and its underlying Informers.
    58  	Run(ctx context.Context)
    59  }
    60  
    61  type operator struct {
    62  	serverVersion    discovery.ServerVersionInterface
    63  	queueInformers   []*QueueInformer
    64  	informers        []cache.SharedIndexInformer
    65  	hasSynced        cache.InformerSynced
    66  	mu               sync.RWMutex
    67  	numWorkers       int
    68  	runInformersOnce sync.Once
    69  	reconcileOnce    sync.Once
    70  	logger           *logrus.Logger
    71  	ready            chan struct{}
    72  	done             chan struct{}
    73  	atLevel          chan error
    74  	syncCh           chan error
    75  	started          bool
    76  }
    77  
    78  func (o *operator) Ready() <-chan struct{} {
    79  	return o.ready
    80  }
    81  
    82  func (o *operator) Done() <-chan struct{} {
    83  	return o.done
    84  }
    85  
    86  func (o *operator) AtLevel() <-chan error {
    87  	return o.atLevel
    88  }
    89  
    90  func (o *operator) HasSynced() bool {
    91  	return o.hasSynced()
    92  }
    93  
    94  func (o *operator) Started() bool {
    95  	o.mu.RLock()
    96  	defer o.mu.RUnlock()
    97  
    98  	return o.started
    99  }
   100  
   101  func (o *operator) RegisterQueueInformer(queueInformer *QueueInformer) error {
   102  	o.mu.Lock()
   103  	defer o.mu.Unlock()
   104  
   105  	err := errors.New("failed to register queue informer")
   106  	if queueInformer == nil {
   107  		return errors.Wrap(err, "nil queue informer")
   108  	}
   109  
   110  	if o.started {
   111  		return errors.Wrap(err, "operator already started")
   112  	}
   113  
   114  	o.queueInformers = append(o.queueInformers, queueInformer)
   115  
   116  	// Some QueueInformers do not have informers associated with them.
   117  	// Only add to the list of informers when one exists.
   118  	if informer := queueInformer.informer; informer != nil {
   119  		o.registerInformer(informer)
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (o *operator) RegisterInformer(informer cache.SharedIndexInformer) error {
   126  	o.mu.Lock()
   127  	defer o.mu.Unlock()
   128  
   129  	err := errors.New("failed to register informer")
   130  	if informer == nil {
   131  		return errors.Wrap(err, "nil informer")
   132  	}
   133  
   134  	if o.started {
   135  		return errors.Wrap(err, "operator already started")
   136  	}
   137  
   138  	o.registerInformer(informer)
   139  
   140  	return nil
   141  }
   142  
   143  func (o *operator) registerInformer(informer cache.SharedIndexInformer) {
   144  	// never double-register an informer
   145  	for i := range o.informers {
   146  		if o.informers[i] == informer {
   147  			return
   148  		}
   149  	}
   150  	o.informers = append(o.informers, informer)
   151  	o.addHasSynced(informer.HasSynced)
   152  }
   153  
   154  func (o *operator) addHasSynced(hasSynced cache.InformerSynced) {
   155  	if o.hasSynced == nil {
   156  		o.hasSynced = hasSynced
   157  		return
   158  	}
   159  
   160  	prev := o.hasSynced
   161  	o.hasSynced = func() bool {
   162  		return prev() && hasSynced()
   163  	}
   164  }
   165  
   166  // TODO: generalize over func(ctx) so this can start informers + source watcher
   167  func (o *operator) RunInformers(ctx context.Context) {
   168  	o.runInformersOnce.Do(func() {
   169  		o.mu.Lock()
   170  		defer o.mu.Unlock()
   171  		for _, informer := range o.informers {
   172  			go informer.Run(ctx.Done())
   173  		}
   174  
   175  		o.started = true
   176  		o.logger.Infof("informers started")
   177  	})
   178  }
   179  
   180  // Run starts the operator's control loops.
   181  func (o *operator) Run(ctx context.Context) {
   182  	o.reconcileOnce.Do(func() {
   183  		go func() {
   184  			defer func() {
   185  				for _, queueInformer := range o.queueInformers {
   186  					queueInformer.queue.ShutDown()
   187  				}
   188  				close(o.atLevel)
   189  				close(o.done)
   190  			}()
   191  			if err := o.start(ctx); err != nil {
   192  				o.logger.WithError(err).Error("error encountered during startup")
   193  				return
   194  			}
   195  			<-ctx.Done()
   196  		}()
   197  	})
   198  }
   199  
   200  func (o *operator) start(ctx context.Context) error {
   201  	defer close(o.ready)
   202  
   203  	// goroutine will be unnecessary after https://github.com/kubernetes/enhancements/pull/1503
   204  	errs := make(chan error)
   205  	go func() {
   206  		defer close(errs)
   207  		v, err := o.serverVersion.ServerVersion()
   208  		if err == nil {
   209  			o.logger.Infof("connection established. cluster-version: %v", v)
   210  			return
   211  		}
   212  		select {
   213  		case <-time.After(defaultServerVersionInterval):
   214  		case <-ctx.Done():
   215  			return
   216  		}
   217  		v, err = o.serverVersion.ServerVersion()
   218  		if err != nil {
   219  			select {
   220  			case errs <- errors.Wrap(err, "communicating with server failed"):
   221  			case <-ctx.Done():
   222  				// don't block send forever on cancellation
   223  			}
   224  			return
   225  		}
   226  		o.logger.Infof("connection established. cluster-version: %v", v)
   227  	}()
   228  
   229  	select {
   230  	case err := <-errs:
   231  		if err != nil {
   232  			return fmt.Errorf("operator not ready: %s", err.Error())
   233  		}
   234  		o.logger.Info("operator ready")
   235  	case <-ctx.Done():
   236  		return nil
   237  	}
   238  
   239  	o.logger.Info("starting informers...")
   240  	o.RunInformers(ctx)
   241  
   242  	o.logger.Info("waiting for caches to sync...")
   243  	if ok := cache.WaitForCacheSync(ctx.Done(), o.hasSynced); !ok {
   244  		return fmt.Errorf("failed to wait for caches to sync")
   245  	}
   246  
   247  	o.logger.Info("starting workers...")
   248  	for _, queueInformer := range o.queueInformers {
   249  		for w := 0; w < o.numWorkers; w++ {
   250  			go o.worker(ctx, queueInformer)
   251  		}
   252  	}
   253  
   254  	return nil
   255  }
   256  
   257  // worker runs a worker thread that just dequeues items, processes them, and marks them done.
   258  // It enforces that the syncHandler is never invoked concurrently with the same key.
   259  func (o *operator) worker(ctx context.Context, loop *QueueInformer) {
   260  	for o.processNextWorkItem(ctx, loop) {
   261  	}
   262  }
   263  
   264  func (o *operator) processNextWorkItem(ctx context.Context, loop *QueueInformer) bool {
   265  	queue := loop.queue
   266  	item, quit := queue.Get()
   267  
   268  	if quit {
   269  		return false
   270  	}
   271  	defer queue.Done(item)
   272  
   273  	logger := o.logger.WithField("item", item)
   274  	logger.WithField("queue-length", queue.Len()).Trace("popped queue")
   275  
   276  	event, ok := item.(kubestate.ResourceEvent)
   277  	if !ok || event.Type() != kubestate.ResourceDeleted {
   278  		// Get the key
   279  		key, keyable := loop.key(item)
   280  		if !keyable {
   281  			logger.WithField("item", item).Warn("could not form key")
   282  			queue.Forget(item)
   283  			return true
   284  		}
   285  
   286  		logger = logger.WithField("cache-key", key)
   287  
   288  		var resource interface{}
   289  		if loop.indexer == nil {
   290  			resource = event.Resource()
   291  		} else {
   292  			// Get the current cached version of the resource
   293  			var exists bool
   294  			var err error
   295  			resource, exists, err = loop.indexer.GetByKey(key)
   296  			if err != nil {
   297  				logger.WithError(err).Error("cache get failed")
   298  				queue.Forget(item)
   299  				return true
   300  			}
   301  			if !exists {
   302  				logger.WithField("existing-cache-keys", loop.indexer.ListKeys()).Debug("cache get failed, key not in cache")
   303  				queue.Forget(item)
   304  				return true
   305  			}
   306  		}
   307  
   308  		if !ok {
   309  			event = kubestate.NewResourceEvent(kubestate.ResourceUpdated, resource)
   310  		} else {
   311  			event = kubestate.NewResourceEvent(event.Type(), resource)
   312  		}
   313  	}
   314  
   315  	// Sync and requeue on error (throw out failed deletion syncs)
   316  	err := loop.Sync(ctx, event)
   317  	if requeues := queue.NumRequeues(item); err != nil && requeues < 8 && event.Type() != kubestate.ResourceDeleted {
   318  		logger.WithField("requeues", requeues).Trace("requeuing with rate limiting")
   319  		utilruntime.HandleError(errors.Wrap(err, fmt.Sprintf("sync %q failed", item)))
   320  		queue.AddRateLimited(item)
   321  		return true
   322  	}
   323  	queue.Forget(item)
   324  
   325  	select {
   326  	case o.syncCh <- err:
   327  	default:
   328  	}
   329  
   330  	return true
   331  }
   332  
   333  // NewOperator returns a new Operator configured to manage the cluster with the given server version client.
   334  func NewOperator(sv discovery.ServerVersionInterface, options ...OperatorOption) (Operator, error) {
   335  	config := defaultOperatorConfig()
   336  	config.serverVersion = sv
   337  	config.apply(options)
   338  	if err := config.validate(); err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	return newOperatorFromConfig(config)
   343  
   344  }
   345  
   346  func newOperatorFromConfig(config *operatorConfig) (*operator, error) {
   347  	op := &operator{
   348  		serverVersion: config.serverVersion,
   349  		numWorkers:    config.numWorkers,
   350  		logger:        config.logger,
   351  		ready:         make(chan struct{}),
   352  		done:          make(chan struct{}),
   353  		atLevel:       make(chan error, 25),
   354  	}
   355  	op.syncCh = op.atLevel
   356  
   357  	// Register QueueInformers and Informers
   358  	for _, queueInformer := range op.queueInformers {
   359  		if err := op.RegisterQueueInformer(queueInformer); err != nil {
   360  			return nil, err
   361  		}
   362  	}
   363  	for _, informer := range op.informers {
   364  		if err := op.RegisterInformer(informer); err != nil {
   365  			return nil, err
   366  		}
   367  	}
   368  
   369  	return op, nil
   370  }