k8s.io/apiserver@v0.29.3/pkg/storage/cacher/cacher.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cacher
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"reflect"
    24  	"sync"
    25  	"time"
    26  
    27  	"go.opentelemetry.io/otel/attribute"
    28  	"google.golang.org/grpc/metadata"
    29  
    30  	"k8s.io/apimachinery/pkg/api/errors"
    31  	"k8s.io/apimachinery/pkg/api/meta"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/conversion"
    34  	"k8s.io/apimachinery/pkg/fields"
    35  	"k8s.io/apimachinery/pkg/labels"
    36  	"k8s.io/apimachinery/pkg/runtime"
    37  	"k8s.io/apimachinery/pkg/runtime/schema"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  	"k8s.io/apimachinery/pkg/watch"
    40  	"k8s.io/apiserver/pkg/audit"
    41  	"k8s.io/apiserver/pkg/endpoints/request"
    42  	"k8s.io/apiserver/pkg/features"
    43  	"k8s.io/apiserver/pkg/storage"
    44  	"k8s.io/apiserver/pkg/storage/cacher/metrics"
    45  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    46  	"k8s.io/client-go/tools/cache"
    47  	"k8s.io/component-base/tracing"
    48  
    49  	"k8s.io/klog/v2"
    50  	"k8s.io/utils/clock"
    51  )
    52  
    53  var (
    54  	emptyFunc = func(bool) {}
    55  )
    56  
    57  const (
    58  	// storageWatchListPageSize is the cacher's request chunk size of
    59  	// initial and resync watch lists to storage.
    60  	storageWatchListPageSize = int64(10000)
    61  	// defaultBookmarkFrequency defines how frequently watch bookmarks should be send
    62  	// in addition to sending a bookmark right before watch deadline.
    63  	//
    64  	// NOTE: Update `eventFreshDuration` when changing this value.
    65  	defaultBookmarkFrequency = time.Minute
    66  )
    67  
    68  // Config contains the configuration for a given Cache.
    69  type Config struct {
    70  	// An underlying storage.Interface.
    71  	Storage storage.Interface
    72  
    73  	// An underlying storage.Versioner.
    74  	Versioner storage.Versioner
    75  
    76  	// The GroupResource the cacher is caching. Used for disambiguating *unstructured.Unstructured (CRDs) in logging
    77  	// and metrics.
    78  	GroupResource schema.GroupResource
    79  
    80  	// The Cache will be caching objects of a given Type and assumes that they
    81  	// are all stored under ResourcePrefix directory in the underlying database.
    82  	ResourcePrefix string
    83  
    84  	// KeyFunc is used to get a key in the underlying storage for a given object.
    85  	KeyFunc func(runtime.Object) (string, error)
    86  
    87  	// GetAttrsFunc is used to get object labels, fields
    88  	GetAttrsFunc func(runtime.Object) (label labels.Set, field fields.Set, err error)
    89  
    90  	// IndexerFuncs is used for optimizing amount of watchers that
    91  	// needs to process an incoming event.
    92  	IndexerFuncs storage.IndexerFuncs
    93  
    94  	// Indexers is used to accelerate the list operation, falls back to regular list
    95  	// operation if no indexer found.
    96  	Indexers *cache.Indexers
    97  
    98  	// NewFunc is a function that creates new empty object storing a object of type Type.
    99  	NewFunc func() runtime.Object
   100  
   101  	// NewList is a function that creates new empty object storing a list of
   102  	// objects of type Type.
   103  	NewListFunc func() runtime.Object
   104  
   105  	Codec runtime.Codec
   106  
   107  	Clock clock.WithTicker
   108  }
   109  
   110  type watchersMap map[int]*cacheWatcher
   111  
   112  func (wm watchersMap) addWatcher(w *cacheWatcher, number int) {
   113  	wm[number] = w
   114  }
   115  
   116  func (wm watchersMap) deleteWatcher(number int) {
   117  	delete(wm, number)
   118  }
   119  
   120  func (wm watchersMap) terminateAll(done func(*cacheWatcher)) {
   121  	for key, watcher := range wm {
   122  		delete(wm, key)
   123  		done(watcher)
   124  	}
   125  }
   126  
   127  type indexedWatchers struct {
   128  	allWatchers   map[namespacedName]watchersMap
   129  	valueWatchers map[string]watchersMap
   130  }
   131  
   132  func (i *indexedWatchers) addWatcher(w *cacheWatcher, number int, scope namespacedName, value string, supported bool) {
   133  	if supported {
   134  		if _, ok := i.valueWatchers[value]; !ok {
   135  			i.valueWatchers[value] = watchersMap{}
   136  		}
   137  		i.valueWatchers[value].addWatcher(w, number)
   138  	} else {
   139  		scopedWatchers, ok := i.allWatchers[scope]
   140  		if !ok {
   141  			scopedWatchers = watchersMap{}
   142  			i.allWatchers[scope] = scopedWatchers
   143  		}
   144  		scopedWatchers.addWatcher(w, number)
   145  	}
   146  }
   147  
   148  func (i *indexedWatchers) deleteWatcher(number int, scope namespacedName, value string, supported bool) {
   149  	if supported {
   150  		i.valueWatchers[value].deleteWatcher(number)
   151  		if len(i.valueWatchers[value]) == 0 {
   152  			delete(i.valueWatchers, value)
   153  		}
   154  	} else {
   155  		i.allWatchers[scope].deleteWatcher(number)
   156  		if len(i.allWatchers[scope]) == 0 {
   157  			delete(i.allWatchers, scope)
   158  		}
   159  	}
   160  }
   161  
   162  func (i *indexedWatchers) terminateAll(groupResource schema.GroupResource, done func(*cacheWatcher)) {
   163  	// note that we don't have to call setDrainInputBufferLocked method on the watchers
   164  	// because we take advantage of the default value - stop immediately
   165  	// also watchers that have had already its draining strategy set
   166  	// are no longer available (they were removed from the allWatchers and the valueWatchers maps)
   167  	if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 {
   168  		klog.Warningf("Terminating all watchers from cacher %v", groupResource)
   169  	}
   170  	for _, watchers := range i.allWatchers {
   171  		watchers.terminateAll(done)
   172  	}
   173  	for _, watchers := range i.valueWatchers {
   174  		watchers.terminateAll(done)
   175  	}
   176  	i.allWatchers = map[namespacedName]watchersMap{}
   177  	i.valueWatchers = map[string]watchersMap{}
   178  }
   179  
   180  // As we don't need a high precision here, we keep all watchers timeout within a
   181  // second in a bucket, and pop up them once at the timeout. To be more specific,
   182  // if you set fire time at X, you can get the bookmark within (X-1,X+1) period.
   183  type watcherBookmarkTimeBuckets struct {
   184  	// the key of watcherBuckets is the number of seconds since createTime
   185  	watchersBuckets   map[int64][]*cacheWatcher
   186  	createTime        time.Time
   187  	startBucketID     int64
   188  	clock             clock.Clock
   189  	bookmarkFrequency time.Duration
   190  }
   191  
   192  func newTimeBucketWatchers(clock clock.Clock, bookmarkFrequency time.Duration) *watcherBookmarkTimeBuckets {
   193  	return &watcherBookmarkTimeBuckets{
   194  		watchersBuckets:   make(map[int64][]*cacheWatcher),
   195  		createTime:        clock.Now(),
   196  		startBucketID:     0,
   197  		clock:             clock,
   198  		bookmarkFrequency: bookmarkFrequency,
   199  	}
   200  }
   201  
   202  // adds a watcher to the bucket, if the deadline is before the start, it will be
   203  // added to the first one.
   204  func (t *watcherBookmarkTimeBuckets) addWatcherThreadUnsafe(w *cacheWatcher) bool {
   205  	// note that the returned time can be before t.createTime,
   206  	// especially in cases when the nextBookmarkTime method
   207  	// give us the zero value of type Time
   208  	// so buckedID can hold a negative value
   209  	nextTime, ok := w.nextBookmarkTime(t.clock.Now(), t.bookmarkFrequency)
   210  	if !ok {
   211  		return false
   212  	}
   213  	bucketID := int64(nextTime.Sub(t.createTime) / time.Second)
   214  	if bucketID < t.startBucketID {
   215  		bucketID = t.startBucketID
   216  	}
   217  	watchers := t.watchersBuckets[bucketID]
   218  	t.watchersBuckets[bucketID] = append(watchers, w)
   219  	return true
   220  }
   221  
   222  func (t *watcherBookmarkTimeBuckets) popExpiredWatchersThreadUnsafe() [][]*cacheWatcher {
   223  	currentBucketID := int64(t.clock.Since(t.createTime) / time.Second)
   224  	// There should be one or two elements in almost all cases
   225  	expiredWatchers := make([][]*cacheWatcher, 0, 2)
   226  	for ; t.startBucketID <= currentBucketID; t.startBucketID++ {
   227  		if watchers, ok := t.watchersBuckets[t.startBucketID]; ok {
   228  			delete(t.watchersBuckets, t.startBucketID)
   229  			expiredWatchers = append(expiredWatchers, watchers)
   230  		}
   231  	}
   232  	return expiredWatchers
   233  }
   234  
   235  type filterWithAttrsFunc func(key string, l labels.Set, f fields.Set) bool
   236  
   237  type indexedTriggerFunc struct {
   238  	indexName   string
   239  	indexerFunc storage.IndexerFunc
   240  }
   241  
   242  // Cacher is responsible for serving WATCH and LIST requests for a given
   243  // resource from its internal cache and updating its cache in the background
   244  // based on the underlying storage contents.
   245  // Cacher implements storage.Interface (although most of the calls are just
   246  // delegated to the underlying storage).
   247  type Cacher struct {
   248  	// HighWaterMarks for performance debugging.
   249  	// Important: Since HighWaterMark is using sync/atomic, it has to be at the top of the struct due to a bug on 32-bit platforms
   250  	// See: https://golang.org/pkg/sync/atomic/ for more information
   251  	incomingHWM storage.HighWaterMark
   252  	// Incoming events that should be dispatched to watchers.
   253  	incoming chan watchCacheEvent
   254  
   255  	resourcePrefix string
   256  
   257  	sync.RWMutex
   258  
   259  	// Before accessing the cacher's cache, wait for the ready to be ok.
   260  	// This is necessary to prevent users from accessing structures that are
   261  	// uninitialized or are being repopulated right now.
   262  	// ready needs to be set to false when the cacher is paused or stopped.
   263  	// ready needs to be set to true when the cacher is ready to use after
   264  	// initialization.
   265  	ready *ready
   266  
   267  	// Underlying storage.Interface.
   268  	storage storage.Interface
   269  
   270  	// Expected type of objects in the underlying cache.
   271  	objectType reflect.Type
   272  	// Used for logging, to disambiguate *unstructured.Unstructured (CRDs)
   273  	groupResource schema.GroupResource
   274  
   275  	// "sliding window" of recent changes of objects and the current state.
   276  	watchCache *watchCache
   277  	reflector  *cache.Reflector
   278  
   279  	// Versioner is used to handle resource versions.
   280  	versioner storage.Versioner
   281  
   282  	// newFunc is a function that creates new empty object storing a object of type Type.
   283  	newFunc func() runtime.Object
   284  
   285  	// newListFunc is a function that creates new empty list for storing objects of type Type.
   286  	newListFunc func() runtime.Object
   287  
   288  	// indexedTrigger is used for optimizing amount of watchers that needs to process
   289  	// an incoming event.
   290  	indexedTrigger *indexedTriggerFunc
   291  	// watchers is mapping from the value of trigger function that a
   292  	// watcher is interested into the watchers
   293  	watcherIdx int
   294  	watchers   indexedWatchers
   295  
   296  	// Defines a time budget that can be spend on waiting for not-ready watchers
   297  	// while dispatching event before shutting them down.
   298  	dispatchTimeoutBudget timeBudget
   299  
   300  	// Handling graceful termination.
   301  	stopLock sync.RWMutex
   302  	stopped  bool
   303  	stopCh   chan struct{}
   304  	stopWg   sync.WaitGroup
   305  
   306  	clock clock.Clock
   307  	// timer is used to avoid unnecessary allocations in underlying watchers.
   308  	timer *time.Timer
   309  
   310  	// dispatching determines whether there is currently dispatching of
   311  	// any event in flight.
   312  	dispatching bool
   313  	// watchersBuffer is a list of watchers potentially interested in currently
   314  	// dispatched event.
   315  	watchersBuffer []*cacheWatcher
   316  	// blockedWatchers is a list of watchers whose buffer is currently full.
   317  	blockedWatchers []*cacheWatcher
   318  	// watchersToStop is a list of watchers that were supposed to be stopped
   319  	// during current dispatching, but stopping was deferred to the end of
   320  	// dispatching that event to avoid race with closing channels in watchers.
   321  	watchersToStop []*cacheWatcher
   322  	// Maintain a timeout queue to send the bookmark event before the watcher times out.
   323  	// Note that this field when accessed MUST be protected by the Cacher.lock.
   324  	bookmarkWatchers *watcherBookmarkTimeBuckets
   325  	// expiredBookmarkWatchers is a list of watchers that were expired and need to be schedule for a next bookmark event
   326  	expiredBookmarkWatchers []*cacheWatcher
   327  }
   328  
   329  func (c *Cacher) RequestWatchProgress(ctx context.Context) error {
   330  	return c.storage.RequestWatchProgress(ctx)
   331  }
   332  
   333  // NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
   334  // its internal cache and updating its cache in the background based on the
   335  // given configuration.
   336  func NewCacherFromConfig(config Config) (*Cacher, error) {
   337  	stopCh := make(chan struct{})
   338  	obj := config.NewFunc()
   339  	// Give this error when it is constructed rather than when you get the
   340  	// first watch item, because it's much easier to track down that way.
   341  	if err := runtime.CheckCodec(config.Codec, obj); err != nil {
   342  		return nil, fmt.Errorf("storage codec doesn't seem to match given type: %v", err)
   343  	}
   344  
   345  	var indexedTrigger *indexedTriggerFunc
   346  	if config.IndexerFuncs != nil {
   347  		// For now, we don't support multiple trigger functions defined
   348  		// for a given resource.
   349  		if len(config.IndexerFuncs) > 1 {
   350  			return nil, fmt.Errorf("cacher %s doesn't support more than one IndexerFunc: ", reflect.TypeOf(obj).String())
   351  		}
   352  		for key, value := range config.IndexerFuncs {
   353  			if value != nil {
   354  				indexedTrigger = &indexedTriggerFunc{
   355  					indexName:   key,
   356  					indexerFunc: value,
   357  				}
   358  			}
   359  		}
   360  	}
   361  
   362  	if config.Clock == nil {
   363  		config.Clock = clock.RealClock{}
   364  	}
   365  	objType := reflect.TypeOf(obj)
   366  	cacher := &Cacher{
   367  		resourcePrefix: config.ResourcePrefix,
   368  		ready:          newReady(),
   369  		storage:        config.Storage,
   370  		objectType:     objType,
   371  		groupResource:  config.GroupResource,
   372  		versioner:      config.Versioner,
   373  		newFunc:        config.NewFunc,
   374  		newListFunc:    config.NewListFunc,
   375  		indexedTrigger: indexedTrigger,
   376  		watcherIdx:     0,
   377  		watchers: indexedWatchers{
   378  			allWatchers:   make(map[namespacedName]watchersMap),
   379  			valueWatchers: make(map[string]watchersMap),
   380  		},
   381  		// TODO: Figure out the correct value for the buffer size.
   382  		incoming:              make(chan watchCacheEvent, 100),
   383  		dispatchTimeoutBudget: newTimeBudget(),
   384  		// We need to (potentially) stop both:
   385  		// - wait.Until go-routine
   386  		// - reflector.ListAndWatch
   387  		// and there are no guarantees on the order that they will stop.
   388  		// So we will be simply closing the channel, and synchronizing on the WaitGroup.
   389  		stopCh:           stopCh,
   390  		clock:            config.Clock,
   391  		timer:            time.NewTimer(time.Duration(0)),
   392  		bookmarkWatchers: newTimeBucketWatchers(config.Clock, defaultBookmarkFrequency),
   393  	}
   394  
   395  	// Ensure that timer is stopped.
   396  	if !cacher.timer.Stop() {
   397  		// Consume triggered (but not yet received) timer event
   398  		// so that future reuse does not get a spurious timeout.
   399  		<-cacher.timer.C
   400  	}
   401  	var contextMetadata metadata.MD
   402  	if utilfeature.DefaultFeatureGate.Enabled(features.SeparateCacheWatchRPC) {
   403  		// Add grpc context metadata to watch and progress notify requests done by cacher to:
   404  		// * Prevent starvation of watch opened by cacher, by moving it to separate Watch RPC than watch request that bypass cacher.
   405  		// * Ensure that progress notification requests are executed on the same Watch RPC as their watch, which is required for it to work.
   406  		contextMetadata = metadata.New(map[string]string{"source": "cache"})
   407  	}
   408  
   409  	progressRequester := newConditionalProgressRequester(config.Storage.RequestWatchProgress, config.Clock, contextMetadata)
   410  	watchCache := newWatchCache(
   411  		config.KeyFunc, cacher.processEvent, config.GetAttrsFunc, config.Versioner, config.Indexers, config.Clock, config.GroupResource, progressRequester)
   412  	listerWatcher := NewListerWatcher(config.Storage, config.ResourcePrefix, config.NewListFunc, contextMetadata)
   413  	reflectorName := "storage/cacher.go:" + config.ResourcePrefix
   414  
   415  	reflector := cache.NewNamedReflector(reflectorName, listerWatcher, obj, watchCache, 0)
   416  	// Configure reflector's pager to for an appropriate pagination chunk size for fetching data from
   417  	// storage. The pager falls back to full list if paginated list calls fail due to an "Expired" error.
   418  	reflector.WatchListPageSize = storageWatchListPageSize
   419  	// When etcd loses leader for 3 cycles, it returns error "no leader".
   420  	// We don't want to terminate all watchers as recreating all watchers puts high load on api-server.
   421  	// In most of the cases, leader is reelected within few cycles.
   422  	reflector.MaxInternalErrorRetryDuration = time.Second * 30
   423  	// since the watch-list is provided by the watch cache instruct
   424  	// the reflector to issue a regular LIST against the store
   425  	reflector.UseWatchList = false
   426  
   427  	cacher.watchCache = watchCache
   428  	cacher.reflector = reflector
   429  
   430  	go cacher.dispatchEvents()
   431  	go progressRequester.Run(stopCh)
   432  
   433  	cacher.stopWg.Add(1)
   434  	go func() {
   435  		defer cacher.stopWg.Done()
   436  		defer cacher.terminateAllWatchers()
   437  		wait.Until(
   438  			func() {
   439  				if !cacher.isStopped() {
   440  					cacher.startCaching(stopCh)
   441  				}
   442  			}, time.Second, stopCh,
   443  		)
   444  	}()
   445  
   446  	return cacher, nil
   447  }
   448  
   449  func (c *Cacher) startCaching(stopChannel <-chan struct{}) {
   450  	// The 'usable' lock is always 'RLock'able when it is safe to use the cache.
   451  	// It is safe to use the cache after a successful list until a disconnection.
   452  	// We start with usable (write) locked. The below OnReplace function will
   453  	// unlock it after a successful list. The below defer will then re-lock
   454  	// it when this function exits (always due to disconnection), only if
   455  	// we actually got a successful list. This cycle will repeat as needed.
   456  	successfulList := false
   457  	c.watchCache.SetOnReplace(func() {
   458  		successfulList = true
   459  		c.ready.set(true)
   460  		klog.V(1).Infof("cacher (%v): initialized", c.groupResource.String())
   461  		metrics.WatchCacheInitializations.WithLabelValues(c.groupResource.String()).Inc()
   462  	})
   463  	defer func() {
   464  		if successfulList {
   465  			c.ready.set(false)
   466  		}
   467  	}()
   468  
   469  	c.terminateAllWatchers()
   470  	// Note that since onReplace may be not called due to errors, we explicitly
   471  	// need to retry it on errors under lock.
   472  	// Also note that startCaching is called in a loop, so there's no need
   473  	// to have another loop here.
   474  	if err := c.reflector.ListAndWatch(stopChannel); err != nil {
   475  		klog.Errorf("cacher (%v): unexpected ListAndWatch error: %v; reinitializing...", c.groupResource.String(), err)
   476  	}
   477  }
   478  
   479  // Versioner implements storage.Interface.
   480  func (c *Cacher) Versioner() storage.Versioner {
   481  	return c.storage.Versioner()
   482  }
   483  
   484  // Create implements storage.Interface.
   485  func (c *Cacher) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
   486  	return c.storage.Create(ctx, key, obj, out, ttl)
   487  }
   488  
   489  // Delete implements storage.Interface.
   490  func (c *Cacher) Delete(
   491  	ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions,
   492  	validateDeletion storage.ValidateObjectFunc, _ runtime.Object) error {
   493  	// Ignore the suggestion and try to pass down the current version of the object
   494  	// read from cache.
   495  	if elem, exists, err := c.watchCache.GetByKey(key); err != nil {
   496  		klog.Errorf("GetByKey returned error: %v", err)
   497  	} else if exists {
   498  		// DeepCopy the object since we modify resource version when serializing the
   499  		// current object.
   500  		currObj := elem.(*storeElement).Object.DeepCopyObject()
   501  		return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, currObj)
   502  	}
   503  	// If we couldn't get the object, fallback to no-suggestion.
   504  	return c.storage.Delete(ctx, key, out, preconditions, validateDeletion, nil)
   505  }
   506  
   507  type namespacedName struct {
   508  	namespace string
   509  	name      string
   510  }
   511  
   512  // Watch implements storage.Interface.
   513  func (c *Cacher) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
   514  	pred := opts.Predicate
   515  	// if the watch-list feature wasn't set and the resourceVersion is unset
   516  	// ensure that the rv from which the watch is being served, is the latest
   517  	// one. "latest" is ensured by serving the watch from
   518  	// the underlying storage.
   519  	//
   520  	// it should never happen due to our validation but let's just be super-safe here
   521  	// and disable sendingInitialEvents when the feature wasn't enabled
   522  	if !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) && opts.SendInitialEvents != nil {
   523  		opts.SendInitialEvents = nil
   524  	}
   525  	if opts.SendInitialEvents == nil && opts.ResourceVersion == "" {
   526  		return c.storage.Watch(ctx, key, opts)
   527  	}
   528  	requestedWatchRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
   529  	if err != nil {
   530  		return nil, err
   531  	}
   532  
   533  	readyGeneration, err := c.ready.waitAndReadGeneration(ctx)
   534  	if err != nil {
   535  		return nil, errors.NewServiceUnavailable(err.Error())
   536  	}
   537  
   538  	// determine the namespace and name scope of the watch, first from the request, secondarily from the field selector
   539  	scope := namespacedName{}
   540  	if requestNamespace, ok := request.NamespaceFrom(ctx); ok && len(requestNamespace) > 0 {
   541  		scope.namespace = requestNamespace
   542  	} else if selectorNamespace, ok := pred.Field.RequiresExactMatch("metadata.namespace"); ok {
   543  		scope.namespace = selectorNamespace
   544  	}
   545  	if requestInfo, ok := request.RequestInfoFrom(ctx); ok && requestInfo != nil && len(requestInfo.Name) > 0 {
   546  		scope.name = requestInfo.Name
   547  	} else if selectorName, ok := pred.Field.RequiresExactMatch("metadata.name"); ok {
   548  		scope.name = selectorName
   549  	}
   550  
   551  	triggerValue, triggerSupported := "", false
   552  	if c.indexedTrigger != nil {
   553  		for _, field := range pred.IndexFields {
   554  			if field == c.indexedTrigger.indexName {
   555  				if value, ok := pred.Field.RequiresExactMatch(field); ok {
   556  					triggerValue, triggerSupported = value, true
   557  				}
   558  			}
   559  		}
   560  	}
   561  
   562  	// It boils down to a tradeoff between:
   563  	// - having it as small as possible to reduce memory usage
   564  	// - having it large enough to ensure that watchers that need to process
   565  	//   a bunch of changes have enough buffer to avoid from blocking other
   566  	//   watchers on our watcher having a processing hiccup
   567  	chanSize := c.watchCache.suggestedWatchChannelSize(c.indexedTrigger != nil, triggerSupported)
   568  
   569  	// Determine a function that computes the bookmarkAfterResourceVersion
   570  	bookmarkAfterResourceVersionFn, err := c.getBookmarkAfterResourceVersionLockedFunc(ctx, requestedWatchRV, opts)
   571  	if err != nil {
   572  		return newErrWatcher(err), nil
   573  	}
   574  
   575  	// Determine a function that computes the watchRV we should start from
   576  	startWatchResourceVersionFn, err := c.getStartResourceVersionForWatchLockedFunc(ctx, requestedWatchRV, opts)
   577  	if err != nil {
   578  		return newErrWatcher(err), nil
   579  	}
   580  
   581  	// Determine watch timeout('0' means deadline is not set, ignore checking)
   582  	deadline, _ := ctx.Deadline()
   583  
   584  	identifier := fmt.Sprintf("key: %q, labels: %q, fields: %q", key, pred.Label, pred.Field)
   585  
   586  	// Create a watcher here to reduce memory allocations under lock,
   587  	// given that memory allocation may trigger GC and block the thread.
   588  	// Also note that emptyFunc is a placeholder, until we will be able
   589  	// to compute watcher.forget function (which has to happen under lock).
   590  	watcher := newCacheWatcher(
   591  		chanSize,
   592  		filterWithAttrsFunction(key, pred),
   593  		emptyFunc,
   594  		c.versioner,
   595  		deadline,
   596  		pred.AllowWatchBookmarks,
   597  		c.groupResource,
   598  		identifier,
   599  	)
   600  
   601  	// note that c.waitUntilWatchCacheFreshAndForceAllEvents must be called without
   602  	// the c.watchCache.RLock held otherwise we are at risk of a deadlock
   603  	// mainly because c.watchCache.processEvent method won't be able to make progress
   604  	//
   605  	// moreover even though the c.waitUntilWatchCacheFreshAndForceAllEvents acquires a lock
   606  	// it is safe to release the lock after the method finishes because we don't require
   607  	// any atomicity between the call to the method and further calls that actually get the events.
   608  	forceAllEvents, err := c.waitUntilWatchCacheFreshAndForceAllEvents(ctx, requestedWatchRV, opts)
   609  	if err != nil {
   610  		return newErrWatcher(err), nil
   611  	}
   612  
   613  	// We explicitly use thread unsafe version and do locking ourself to ensure that
   614  	// no new events will be processed in the meantime. The watchCache will be unlocked
   615  	// on return from this function.
   616  	// Note that we cannot do it under Cacher lock, to avoid a deadlock, since the
   617  	// underlying watchCache is calling processEvent under its lock.
   618  	c.watchCache.RLock()
   619  	defer c.watchCache.RUnlock()
   620  
   621  	startWatchRV := startWatchResourceVersionFn()
   622  	var cacheInterval *watchCacheInterval
   623  	if forceAllEvents {
   624  		cacheInterval, err = c.watchCache.getIntervalFromStoreLocked()
   625  	} else {
   626  		cacheInterval, err = c.watchCache.getAllEventsSinceLocked(startWatchRV)
   627  	}
   628  	if err != nil {
   629  		// To match the uncached watch implementation, once we have passed authn/authz/admission,
   630  		// and successfully parsed a resource version, other errors must fail with a watch event of type ERROR,
   631  		// rather than a directly returned error.
   632  		return newErrWatcher(err), nil
   633  	}
   634  
   635  	addedWatcher := false
   636  	func() {
   637  		c.Lock()
   638  		defer c.Unlock()
   639  
   640  		if generation, ok := c.ready.checkAndReadGeneration(); generation != readyGeneration || !ok {
   641  			// We went unready or are already on a different generation.
   642  			// Avoid registering and starting the watch as it will have to be
   643  			// terminated immediately anyway.
   644  			return
   645  		}
   646  
   647  		// Update watcher.forget function once we can compute it.
   648  		watcher.forget = forgetWatcher(c, watcher, c.watcherIdx, scope, triggerValue, triggerSupported)
   649  		// Update the bookMarkAfterResourceVersion
   650  		watcher.setBookmarkAfterResourceVersion(bookmarkAfterResourceVersionFn())
   651  		c.watchers.addWatcher(watcher, c.watcherIdx, scope, triggerValue, triggerSupported)
   652  		addedWatcher = true
   653  
   654  		// Add it to the queue only when the client support watch bookmarks.
   655  		if watcher.allowWatchBookmarks {
   656  			c.bookmarkWatchers.addWatcherThreadUnsafe(watcher)
   657  		}
   658  		c.watcherIdx++
   659  	}()
   660  
   661  	if !addedWatcher {
   662  		// Watcher isn't really started at this point, so it's safe to just drop it.
   663  		//
   664  		// We're simulating the immediate watch termination, which boils down to simply
   665  		// closing the watcher.
   666  		return newImmediateCloseWatcher(), nil
   667  	}
   668  
   669  	go watcher.processInterval(ctx, cacheInterval, startWatchRV)
   670  	return watcher, nil
   671  }
   672  
   673  // Get implements storage.Interface.
   674  func (c *Cacher) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
   675  	if opts.ResourceVersion == "" {
   676  		// If resourceVersion is not specified, serve it from underlying
   677  		// storage (for backward compatibility).
   678  		return c.storage.Get(ctx, key, opts, objPtr)
   679  	}
   680  
   681  	// If resourceVersion is specified, serve it from cache.
   682  	// It's guaranteed that the returned value is at least that
   683  	// fresh as the given resourceVersion.
   684  	getRV, err := c.versioner.ParseResourceVersion(opts.ResourceVersion)
   685  	if err != nil {
   686  		return err
   687  	}
   688  
   689  	if getRV == 0 && !c.ready.check() {
   690  		// If Cacher is not yet initialized and we don't require any specific
   691  		// minimal resource version, simply forward the request to storage.
   692  		return c.storage.Get(ctx, key, opts, objPtr)
   693  	}
   694  
   695  	// Do not create a trace - it's not for free and there are tons
   696  	// of Get requests. We can add it if it will be really needed.
   697  	if err := c.ready.wait(ctx); err != nil {
   698  		return errors.NewServiceUnavailable(err.Error())
   699  	}
   700  
   701  	objVal, err := conversion.EnforcePtr(objPtr)
   702  	if err != nil {
   703  		return err
   704  	}
   705  
   706  	obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(ctx, getRV, key)
   707  	if err != nil {
   708  		return err
   709  	}
   710  
   711  	if exists {
   712  		elem, ok := obj.(*storeElement)
   713  		if !ok {
   714  			return fmt.Errorf("non *storeElement returned from storage: %v", obj)
   715  		}
   716  		objVal.Set(reflect.ValueOf(elem.Object).Elem())
   717  	} else {
   718  		objVal.Set(reflect.Zero(objVal.Type()))
   719  		if !opts.IgnoreNotFound {
   720  			return storage.NewKeyNotFoundError(key, int64(readResourceVersion))
   721  		}
   722  	}
   723  	return nil
   724  }
   725  
   726  // NOTICE: Keep in sync with shouldListFromStorage function in
   727  //
   728  //	staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/list_work_estimator.go
   729  func shouldDelegateList(opts storage.ListOptions) bool {
   730  	resourceVersion := opts.ResourceVersion
   731  	pred := opts.Predicate
   732  	match := opts.ResourceVersionMatch
   733  	consistentListFromCacheEnabled := utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache)
   734  
   735  	// Serve consistent reads from storage if ConsistentListFromCache is disabled
   736  	consistentReadFromStorage := resourceVersion == "" && !consistentListFromCacheEnabled
   737  	// Watch cache doesn't support continuations, so serve them from etcd.
   738  	hasContinuation := len(pred.Continue) > 0
   739  	// Serve paginated requests about revision "0" from watch cache to avoid overwhelming etcd.
   740  	hasLimit := pred.Limit > 0 && resourceVersion != "0"
   741  	// Watch cache only supports ResourceVersionMatchNotOlderThan (default).
   742  	unsupportedMatch := match != "" && match != metav1.ResourceVersionMatchNotOlderThan
   743  
   744  	return consistentReadFromStorage || hasContinuation || hasLimit || unsupportedMatch
   745  }
   746  
   747  func (c *Cacher) listItems(ctx context.Context, listRV uint64, key string, pred storage.SelectionPredicate, recursive bool) ([]interface{}, uint64, string, error) {
   748  	if !recursive {
   749  		obj, exists, readResourceVersion, err := c.watchCache.WaitUntilFreshAndGet(ctx, listRV, key)
   750  		if err != nil {
   751  			return nil, 0, "", err
   752  		}
   753  		if exists {
   754  			return []interface{}{obj}, readResourceVersion, "", nil
   755  		}
   756  		return nil, readResourceVersion, "", nil
   757  	}
   758  	return c.watchCache.WaitUntilFreshAndList(ctx, listRV, pred.MatcherIndex())
   759  }
   760  
   761  // GetList implements storage.Interface
   762  func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
   763  	recursive := opts.Recursive
   764  	resourceVersion := opts.ResourceVersion
   765  	pred := opts.Predicate
   766  	if shouldDelegateList(opts) {
   767  		return c.storage.GetList(ctx, key, opts, listObj)
   768  	}
   769  
   770  	listRV, err := c.versioner.ParseResourceVersion(resourceVersion)
   771  	if err != nil {
   772  		return err
   773  	}
   774  	if listRV == 0 && !c.ready.check() {
   775  		// If Cacher is not yet initialized and we don't require any specific
   776  		// minimal resource version, simply forward the request to storage.
   777  		return c.storage.GetList(ctx, key, opts, listObj)
   778  	}
   779  	if listRV == 0 && utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) {
   780  		listRV, err = storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
   781  		if err != nil {
   782  			return err
   783  		}
   784  	}
   785  
   786  	ctx, span := tracing.Start(ctx, "cacher list",
   787  		attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)),
   788  		attribute.Stringer("type", c.groupResource))
   789  	defer span.End(500 * time.Millisecond)
   790  
   791  	if err := c.ready.wait(ctx); err != nil {
   792  		return errors.NewServiceUnavailable(err.Error())
   793  	}
   794  	span.AddEvent("Ready")
   795  
   796  	// List elements with at least 'listRV' from cache.
   797  	listPtr, err := meta.GetItemsPtr(listObj)
   798  	if err != nil {
   799  		return err
   800  	}
   801  	listVal, err := conversion.EnforcePtr(listPtr)
   802  	if err != nil {
   803  		return err
   804  	}
   805  	if listVal.Kind() != reflect.Slice {
   806  		return fmt.Errorf("need a pointer to slice, got %v", listVal.Kind())
   807  	}
   808  	filter := filterWithAttrsFunction(key, pred)
   809  
   810  	objs, readResourceVersion, indexUsed, err := c.listItems(ctx, listRV, key, pred, recursive)
   811  	if err != nil {
   812  		return err
   813  	}
   814  	span.AddEvent("Listed items from cache", attribute.Int("count", len(objs)))
   815  	// store pointer of eligible objects,
   816  	// Why not directly put object in the items of listObj?
   817  	//   the elements in ListObject are Struct type, making slice will bring excessive memory consumption.
   818  	//   so we try to delay this action as much as possible
   819  	var selectedObjects []runtime.Object
   820  	for _, obj := range objs {
   821  		elem, ok := obj.(*storeElement)
   822  		if !ok {
   823  			return fmt.Errorf("non *storeElement returned from storage: %v", obj)
   824  		}
   825  		if filter(elem.Key, elem.Labels, elem.Fields) {
   826  			selectedObjects = append(selectedObjects, elem.Object)
   827  		}
   828  	}
   829  	if len(selectedObjects) == 0 {
   830  		// Ensure that we never return a nil Items pointer in the result for consistency.
   831  		listVal.Set(reflect.MakeSlice(listVal.Type(), 0, 0))
   832  	} else {
   833  		// Resize the slice appropriately, since we already know that size of result set
   834  		listVal.Set(reflect.MakeSlice(listVal.Type(), len(selectedObjects), len(selectedObjects)))
   835  		span.AddEvent("Resized result")
   836  		for i, o := range selectedObjects {
   837  			listVal.Index(i).Set(reflect.ValueOf(o).Elem())
   838  		}
   839  	}
   840  	span.AddEvent("Filtered items", attribute.Int("count", listVal.Len()))
   841  	if c.versioner != nil {
   842  		if err := c.versioner.UpdateList(listObj, readResourceVersion, "", nil); err != nil {
   843  			return err
   844  		}
   845  	}
   846  	metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(objs), listVal.Len())
   847  	return nil
   848  }
   849  
   850  // GuaranteedUpdate implements storage.Interface.
   851  func (c *Cacher) GuaranteedUpdate(
   852  	ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool,
   853  	preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error {
   854  	// Ignore the suggestion and try to pass down the current version of the object
   855  	// read from cache.
   856  	if elem, exists, err := c.watchCache.GetByKey(key); err != nil {
   857  		klog.Errorf("GetByKey returned error: %v", err)
   858  	} else if exists {
   859  		// DeepCopy the object since we modify resource version when serializing the
   860  		// current object.
   861  		currObj := elem.(*storeElement).Object.DeepCopyObject()
   862  		return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, currObj)
   863  	}
   864  	// If we couldn't get the object, fallback to no-suggestion.
   865  	return c.storage.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, nil)
   866  }
   867  
   868  // Count implements storage.Interface.
   869  func (c *Cacher) Count(pathPrefix string) (int64, error) {
   870  	return c.storage.Count(pathPrefix)
   871  }
   872  
   873  // baseObjectThreadUnsafe omits locking for cachingObject.
   874  func baseObjectThreadUnsafe(object runtime.Object) runtime.Object {
   875  	if co, ok := object.(*cachingObject); ok {
   876  		return co.object
   877  	}
   878  	return object
   879  }
   880  
   881  func (c *Cacher) triggerValuesThreadUnsafe(event *watchCacheEvent) ([]string, bool) {
   882  	if c.indexedTrigger == nil {
   883  		return nil, false
   884  	}
   885  
   886  	result := make([]string, 0, 2)
   887  	result = append(result, c.indexedTrigger.indexerFunc(baseObjectThreadUnsafe(event.Object)))
   888  	if event.PrevObject == nil {
   889  		return result, true
   890  	}
   891  	prevTriggerValue := c.indexedTrigger.indexerFunc(baseObjectThreadUnsafe(event.PrevObject))
   892  	if result[0] != prevTriggerValue {
   893  		result = append(result, prevTriggerValue)
   894  	}
   895  	return result, true
   896  }
   897  
   898  func (c *Cacher) processEvent(event *watchCacheEvent) {
   899  	if curLen := int64(len(c.incoming)); c.incomingHWM.Update(curLen) {
   900  		// Monitor if this gets backed up, and how much.
   901  		klog.V(1).Infof("cacher (%v): %v objects queued in incoming channel.", c.groupResource.String(), curLen)
   902  	}
   903  	c.incoming <- *event
   904  }
   905  
   906  func (c *Cacher) dispatchEvents() {
   907  	// Jitter to help level out any aggregate load.
   908  	bookmarkTimer := c.clock.NewTimer(wait.Jitter(time.Second, 0.25))
   909  	defer bookmarkTimer.Stop()
   910  
   911  	lastProcessedResourceVersion := uint64(0)
   912  	for {
   913  		select {
   914  		case event, ok := <-c.incoming:
   915  			if !ok {
   916  				return
   917  			}
   918  			// Don't dispatch bookmarks coming from the storage layer.
   919  			// They can be very frequent (even to the level of subseconds)
   920  			// to allow efficient watch resumption on kube-apiserver restarts,
   921  			// and propagating them down may overload the whole system.
   922  			//
   923  			// TODO: If at some point we decide the performance and scalability
   924  			// footprint is acceptable, this is the place to hook them in.
   925  			// However, we then need to check if this was called as a result
   926  			// of a bookmark event or regular Add/Update/Delete operation by
   927  			// checking if resourceVersion here has changed.
   928  			if event.Type != watch.Bookmark {
   929  				c.dispatchEvent(&event)
   930  			}
   931  			lastProcessedResourceVersion = event.ResourceVersion
   932  			metrics.EventsCounter.WithLabelValues(c.groupResource.String()).Inc()
   933  		case <-bookmarkTimer.C():
   934  			bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25))
   935  			// Never send a bookmark event if we did not see an event here, this is fine
   936  			// because we don't provide any guarantees on sending bookmarks.
   937  			//
   938  			// Just pop closed watchers and requeue others if needed.
   939  			//
   940  			// TODO(#115478): rework the following logic
   941  			//  in a way that would allow more
   942  			//  efficient cleanup of closed watchers
   943  			if lastProcessedResourceVersion == 0 {
   944  				func() {
   945  					c.Lock()
   946  					defer c.Unlock()
   947  					for _, watchers := range c.bookmarkWatchers.popExpiredWatchersThreadUnsafe() {
   948  						for _, watcher := range watchers {
   949  							if watcher.stopped {
   950  								continue
   951  							}
   952  							c.bookmarkWatchers.addWatcherThreadUnsafe(watcher)
   953  						}
   954  					}
   955  				}()
   956  				continue
   957  			}
   958  			bookmarkEvent := &watchCacheEvent{
   959  				Type:            watch.Bookmark,
   960  				Object:          c.newFunc(),
   961  				ResourceVersion: lastProcessedResourceVersion,
   962  			}
   963  			if err := c.versioner.UpdateObject(bookmarkEvent.Object, bookmarkEvent.ResourceVersion); err != nil {
   964  				klog.Errorf("failure to set resourceVersion to %d on bookmark event %+v", bookmarkEvent.ResourceVersion, bookmarkEvent.Object)
   965  				continue
   966  			}
   967  			c.dispatchEvent(bookmarkEvent)
   968  		case <-c.stopCh:
   969  			return
   970  		}
   971  	}
   972  }
   973  
   974  func setCachingObjects(event *watchCacheEvent, versioner storage.Versioner) {
   975  	switch event.Type {
   976  	case watch.Added, watch.Modified:
   977  		if object, err := newCachingObject(event.Object); err == nil {
   978  			event.Object = object
   979  		} else {
   980  			klog.Errorf("couldn't create cachingObject from: %#v", event.Object)
   981  		}
   982  		// Don't wrap PrevObject for update event (for create events it is nil).
   983  		// We only encode those to deliver DELETE watch events, so if
   984  		// event.Object is not nil it can be used only for watchers for which
   985  		// selector was satisfied for its previous version and is no longer
   986  		// satisfied for the current version.
   987  		// This is rare enough that it doesn't justify making deep-copy of the
   988  		// object (done by newCachingObject) every time.
   989  	case watch.Deleted:
   990  		// Don't wrap Object for delete events - these are not to deliver any
   991  		// events. Only wrap PrevObject.
   992  		if object, err := newCachingObject(event.PrevObject); err == nil {
   993  			// Update resource version of the object.
   994  			// event.PrevObject is used to deliver DELETE watch events and
   995  			// for them, we set resourceVersion to <current> instead of
   996  			// the resourceVersion of the last modification of the object.
   997  			updateResourceVersion(object, versioner, event.ResourceVersion)
   998  			event.PrevObject = object
   999  		} else {
  1000  			klog.Errorf("couldn't create cachingObject from: %#v", event.Object)
  1001  		}
  1002  	}
  1003  }
  1004  
  1005  func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
  1006  	c.startDispatching(event)
  1007  	defer c.finishDispatching()
  1008  	// Watchers stopped after startDispatching will be delayed to finishDispatching,
  1009  
  1010  	// Since add() can block, we explicitly add when cacher is unlocked.
  1011  	// Dispatching event in nonblocking way first, which make faster watchers
  1012  	// not be blocked by slower ones.
  1013  	if event.Type == watch.Bookmark {
  1014  		for _, watcher := range c.watchersBuffer {
  1015  			watcher.nonblockingAdd(event)
  1016  		}
  1017  	} else {
  1018  		// Set up caching of object serializations only for dispatching this event.
  1019  		//
  1020  		// Storing serializations in memory would result in increased memory usage,
  1021  		// but it would help for caching encodings for watches started from old
  1022  		// versions. However, we still don't have a convincing data that the gain
  1023  		// from it justifies increased memory usage, so for now we drop the cached
  1024  		// serializations after dispatching this event.
  1025  		//
  1026  		// Given that CachingObject is just wrapping the object and not perfoming
  1027  		// deep-copying (until some field is explicitly being modified), we create
  1028  		// it unconditionally to ensure safety and reduce deep-copying.
  1029  		//
  1030  		// Make a shallow copy to allow overwriting Object and PrevObject.
  1031  		wcEvent := *event
  1032  		setCachingObjects(&wcEvent, c.versioner)
  1033  		event = &wcEvent
  1034  
  1035  		c.blockedWatchers = c.blockedWatchers[:0]
  1036  		for _, watcher := range c.watchersBuffer {
  1037  			if !watcher.nonblockingAdd(event) {
  1038  				c.blockedWatchers = append(c.blockedWatchers, watcher)
  1039  			}
  1040  		}
  1041  
  1042  		if len(c.blockedWatchers) > 0 {
  1043  			// dispatchEvent is called very often, so arrange
  1044  			// to reuse timers instead of constantly allocating.
  1045  			startTime := time.Now()
  1046  			timeout := c.dispatchTimeoutBudget.takeAvailable()
  1047  			c.timer.Reset(timeout)
  1048  
  1049  			// Send event to all blocked watchers. As long as timer is running,
  1050  			// `add` will wait for the watcher to unblock. After timeout,
  1051  			// `add` will not wait, but immediately close a still blocked watcher.
  1052  			// Hence, every watcher gets the chance to unblock itself while timer
  1053  			// is running, not only the first ones in the list.
  1054  			timer := c.timer
  1055  			for _, watcher := range c.blockedWatchers {
  1056  				if !watcher.add(event, timer) {
  1057  					// fired, clean the timer by set it to nil.
  1058  					timer = nil
  1059  				}
  1060  			}
  1061  
  1062  			// Stop the timer if it is not fired
  1063  			if timer != nil && !timer.Stop() {
  1064  				// Consume triggered (but not yet received) timer event
  1065  				// so that future reuse does not get a spurious timeout.
  1066  				<-timer.C
  1067  			}
  1068  
  1069  			c.dispatchTimeoutBudget.returnUnused(timeout - time.Since(startTime))
  1070  		}
  1071  	}
  1072  }
  1073  
  1074  func (c *Cacher) startDispatchingBookmarkEventsLocked() {
  1075  	// Pop already expired watchers. However, explicitly ignore stopped ones,
  1076  	// as we don't delete watcher from bookmarkWatchers when it is stopped.
  1077  	for _, watchers := range c.bookmarkWatchers.popExpiredWatchersThreadUnsafe() {
  1078  		for _, watcher := range watchers {
  1079  			// c.Lock() is held here.
  1080  			// watcher.stopThreadUnsafe() is protected by c.Lock()
  1081  			if watcher.stopped {
  1082  				continue
  1083  			}
  1084  			c.watchersBuffer = append(c.watchersBuffer, watcher)
  1085  			c.expiredBookmarkWatchers = append(c.expiredBookmarkWatchers, watcher)
  1086  		}
  1087  	}
  1088  }
  1089  
  1090  // startDispatching chooses watchers potentially interested in a given event
  1091  // a marks dispatching as true.
  1092  func (c *Cacher) startDispatching(event *watchCacheEvent) {
  1093  	// It is safe to call triggerValuesThreadUnsafe here, because at this
  1094  	// point only this thread can access this event (we create a separate
  1095  	// watchCacheEvent for every dispatch).
  1096  	triggerValues, supported := c.triggerValuesThreadUnsafe(event)
  1097  
  1098  	c.Lock()
  1099  	defer c.Unlock()
  1100  
  1101  	c.dispatching = true
  1102  	// We are reusing the slice to avoid memory reallocations in every
  1103  	// dispatchEvent() call. That may prevent Go GC from freeing items
  1104  	// from previous phases that are sitting behind the current length
  1105  	// of the slice, but there is only a limited number of those and the
  1106  	// gain from avoiding memory allocations is much bigger.
  1107  	c.watchersBuffer = c.watchersBuffer[:0]
  1108  
  1109  	if event.Type == watch.Bookmark {
  1110  		c.startDispatchingBookmarkEventsLocked()
  1111  		// return here to reduce following code indentation and diff
  1112  		return
  1113  	}
  1114  
  1115  	// iterate over watchers for each applicable namespace/name tuple
  1116  	namespace := event.ObjFields["metadata.namespace"]
  1117  	name := event.ObjFields["metadata.name"]
  1118  	if len(namespace) > 0 {
  1119  		if len(name) > 0 {
  1120  			// namespaced watchers scoped by name
  1121  			for _, watcher := range c.watchers.allWatchers[namespacedName{namespace: namespace, name: name}] {
  1122  				c.watchersBuffer = append(c.watchersBuffer, watcher)
  1123  			}
  1124  		}
  1125  		// namespaced watchers not scoped by name
  1126  		for _, watcher := range c.watchers.allWatchers[namespacedName{namespace: namespace}] {
  1127  			c.watchersBuffer = append(c.watchersBuffer, watcher)
  1128  		}
  1129  	}
  1130  	if len(name) > 0 {
  1131  		// cluster-wide watchers scoped by name
  1132  		for _, watcher := range c.watchers.allWatchers[namespacedName{name: name}] {
  1133  			c.watchersBuffer = append(c.watchersBuffer, watcher)
  1134  		}
  1135  	}
  1136  	// cluster-wide watchers unscoped by name
  1137  	for _, watcher := range c.watchers.allWatchers[namespacedName{}] {
  1138  		c.watchersBuffer = append(c.watchersBuffer, watcher)
  1139  	}
  1140  
  1141  	if supported {
  1142  		// Iterate over watchers interested in the given values of the trigger.
  1143  		for _, triggerValue := range triggerValues {
  1144  			for _, watcher := range c.watchers.valueWatchers[triggerValue] {
  1145  				c.watchersBuffer = append(c.watchersBuffer, watcher)
  1146  			}
  1147  		}
  1148  	} else {
  1149  		// supported equal to false generally means that trigger function
  1150  		// is not defined (or not aware of any indexes). In this case,
  1151  		// watchers filters should generally also don't generate any
  1152  		// trigger values, but can cause problems in case of some
  1153  		// misconfiguration. Thus we paranoidly leave this branch.
  1154  
  1155  		// Iterate over watchers interested in exact values for all values.
  1156  		for _, watchers := range c.watchers.valueWatchers {
  1157  			for _, watcher := range watchers {
  1158  				c.watchersBuffer = append(c.watchersBuffer, watcher)
  1159  			}
  1160  		}
  1161  	}
  1162  }
  1163  
  1164  // finishDispatching stops all the watchers that were supposed to be
  1165  // stopped in the meantime, but it was deferred to avoid closing input
  1166  // channels of watchers, as add() may still have writing to it.
  1167  // It also marks dispatching as false.
  1168  func (c *Cacher) finishDispatching() {
  1169  	c.Lock()
  1170  	defer c.Unlock()
  1171  	c.dispatching = false
  1172  	for _, watcher := range c.watchersToStop {
  1173  		watcher.stopLocked()
  1174  	}
  1175  	c.watchersToStop = c.watchersToStop[:0]
  1176  
  1177  	for _, watcher := range c.expiredBookmarkWatchers {
  1178  		if watcher.stopped {
  1179  			continue
  1180  		}
  1181  		// requeue the watcher for the next bookmark if needed.
  1182  		c.bookmarkWatchers.addWatcherThreadUnsafe(watcher)
  1183  	}
  1184  	c.expiredBookmarkWatchers = c.expiredBookmarkWatchers[:0]
  1185  }
  1186  
  1187  func (c *Cacher) terminateAllWatchers() {
  1188  	c.Lock()
  1189  	defer c.Unlock()
  1190  	c.watchers.terminateAll(c.groupResource, c.stopWatcherLocked)
  1191  }
  1192  
  1193  func (c *Cacher) stopWatcherLocked(watcher *cacheWatcher) {
  1194  	if c.dispatching {
  1195  		c.watchersToStop = append(c.watchersToStop, watcher)
  1196  	} else {
  1197  		watcher.stopLocked()
  1198  	}
  1199  }
  1200  
  1201  func (c *Cacher) isStopped() bool {
  1202  	c.stopLock.RLock()
  1203  	defer c.stopLock.RUnlock()
  1204  	return c.stopped
  1205  }
  1206  
  1207  // Stop implements the graceful termination.
  1208  func (c *Cacher) Stop() {
  1209  	c.stopLock.Lock()
  1210  	if c.stopped {
  1211  		// avoid stopping twice (note: cachers are shared with subresources)
  1212  		c.stopLock.Unlock()
  1213  		return
  1214  	}
  1215  	c.stopped = true
  1216  	c.ready.stop()
  1217  	c.stopLock.Unlock()
  1218  	close(c.stopCh)
  1219  	c.stopWg.Wait()
  1220  }
  1221  
  1222  func forgetWatcher(c *Cacher, w *cacheWatcher, index int, scope namespacedName, triggerValue string, triggerSupported bool) func(bool) {
  1223  	return func(drainWatcher bool) {
  1224  		c.Lock()
  1225  		defer c.Unlock()
  1226  
  1227  		w.setDrainInputBufferLocked(drainWatcher)
  1228  
  1229  		// It's possible that the watcher is already not in the structure (e.g. in case of
  1230  		// simultaneous Stop() and terminateAllWatchers(), but it is safe to call stopLocked()
  1231  		// on a watcher multiple times.
  1232  		c.watchers.deleteWatcher(index, scope, triggerValue, triggerSupported)
  1233  		c.stopWatcherLocked(w)
  1234  	}
  1235  }
  1236  
  1237  func filterWithAttrsFunction(key string, p storage.SelectionPredicate) filterWithAttrsFunc {
  1238  	filterFunc := func(objKey string, label labels.Set, field fields.Set) bool {
  1239  		if !hasPathPrefix(objKey, key) {
  1240  			return false
  1241  		}
  1242  		return p.MatchesObjectAttributes(label, field)
  1243  	}
  1244  	return filterFunc
  1245  }
  1246  
  1247  // LastSyncResourceVersion returns resource version to which the underlying cache is synced.
  1248  func (c *Cacher) LastSyncResourceVersion() (uint64, error) {
  1249  	if err := c.ready.wait(context.Background()); err != nil {
  1250  		return 0, errors.NewServiceUnavailable(err.Error())
  1251  	}
  1252  
  1253  	resourceVersion := c.reflector.LastSyncResourceVersion()
  1254  	return c.versioner.ParseResourceVersion(resourceVersion)
  1255  }
  1256  
  1257  // getBookmarkAfterResourceVersionLockedFunc returns a function that
  1258  // spits a ResourceVersion after which the bookmark event will be delivered.
  1259  //
  1260  // The returned function must be called under the watchCache lock.
  1261  func (c *Cacher) getBookmarkAfterResourceVersionLockedFunc(ctx context.Context, parsedResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
  1262  	if opts.SendInitialEvents == nil || !*opts.SendInitialEvents || !opts.Predicate.AllowWatchBookmarks {
  1263  		return func() uint64 { return 0 }, nil
  1264  	}
  1265  	return c.getCommonResourceVersionLockedFunc(ctx, parsedResourceVersion, opts)
  1266  }
  1267  
  1268  // getStartResourceVersionForWatchLockedFunc returns a function that
  1269  // spits a ResourceVersion the watch will be started from.
  1270  // Depending on the input parameters the semantics of the returned ResourceVersion are:
  1271  //   - start at Exact (return parsedWatchResourceVersion)
  1272  //   - start at Most Recent (return an RV from etcd)
  1273  //   - start at Any (return the current watchCache's RV)
  1274  //
  1275  // The returned function must be called under the watchCache lock.
  1276  func (c *Cacher) getStartResourceVersionForWatchLockedFunc(ctx context.Context, parsedWatchResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
  1277  	if opts.SendInitialEvents == nil || *opts.SendInitialEvents {
  1278  		return func() uint64 { return parsedWatchResourceVersion }, nil
  1279  	}
  1280  	return c.getCommonResourceVersionLockedFunc(ctx, parsedWatchResourceVersion, opts)
  1281  }
  1282  
  1283  // getCommonResourceVersionLockedFunc a helper that simply computes a ResourceVersion
  1284  // based on the input parameters. Please examine callers of this method to get more context.
  1285  //
  1286  // The returned function must be called under the watchCache lock.
  1287  func (c *Cacher) getCommonResourceVersionLockedFunc(ctx context.Context, parsedWatchResourceVersion uint64, opts storage.ListOptions) (func() uint64, error) {
  1288  	switch {
  1289  	case len(opts.ResourceVersion) == 0:
  1290  		rv, err := storage.GetCurrentResourceVersionFromStorage(ctx, c.storage, c.newListFunc, c.resourcePrefix, c.objectType.String())
  1291  		if err != nil {
  1292  			return nil, err
  1293  		}
  1294  		return func() uint64 { return rv }, nil
  1295  	case parsedWatchResourceVersion == 0:
  1296  		// here we assume that watchCache locked is already held
  1297  		return func() uint64 { return c.watchCache.resourceVersion }, nil
  1298  	default:
  1299  		return func() uint64 { return parsedWatchResourceVersion }, nil
  1300  	}
  1301  }
  1302  
  1303  // waitUntilWatchCacheFreshAndForceAllEvents waits until cache is at least
  1304  // as fresh as given requestedWatchRV if sendInitialEvents was requested.
  1305  // Additionally, it instructs the caller whether it should ask for
  1306  // all events from the cache (full state) or not.
  1307  func (c *Cacher) waitUntilWatchCacheFreshAndForceAllEvents(ctx context.Context, requestedWatchRV uint64, opts storage.ListOptions) (bool, error) {
  1308  	if opts.SendInitialEvents != nil && *opts.SendInitialEvents {
  1309  		err := c.watchCache.waitUntilFreshAndBlock(ctx, requestedWatchRV)
  1310  		defer c.watchCache.RUnlock()
  1311  		return err == nil, err
  1312  	}
  1313  	return false, nil
  1314  }
  1315  
  1316  // errWatcher implements watch.Interface to return a single error
  1317  type errWatcher struct {
  1318  	result chan watch.Event
  1319  }
  1320  
  1321  func newErrWatcher(err error) *errWatcher {
  1322  	// Create an error event
  1323  	errEvent := watch.Event{Type: watch.Error}
  1324  	switch err := err.(type) {
  1325  	case runtime.Object:
  1326  		errEvent.Object = err
  1327  	case *errors.StatusError:
  1328  		errEvent.Object = &err.ErrStatus
  1329  	default:
  1330  		errEvent.Object = &metav1.Status{
  1331  			Status:  metav1.StatusFailure,
  1332  			Message: err.Error(),
  1333  			Reason:  metav1.StatusReasonInternalError,
  1334  			Code:    http.StatusInternalServerError,
  1335  		}
  1336  	}
  1337  
  1338  	// Create a watcher with room for a single event, populate it, and close the channel
  1339  	watcher := &errWatcher{result: make(chan watch.Event, 1)}
  1340  	watcher.result <- errEvent
  1341  	close(watcher.result)
  1342  
  1343  	return watcher
  1344  }
  1345  
  1346  // Implements watch.Interface.
  1347  func (c *errWatcher) ResultChan() <-chan watch.Event {
  1348  	return c.result
  1349  }
  1350  
  1351  // Implements watch.Interface.
  1352  func (c *errWatcher) Stop() {
  1353  	// no-op
  1354  }
  1355  
  1356  // immediateCloseWatcher implements watch.Interface that is immediately closed
  1357  type immediateCloseWatcher struct {
  1358  	result chan watch.Event
  1359  }
  1360  
  1361  func newImmediateCloseWatcher() *immediateCloseWatcher {
  1362  	watcher := &immediateCloseWatcher{result: make(chan watch.Event)}
  1363  	close(watcher.result)
  1364  	return watcher
  1365  }
  1366  
  1367  // Implements watch.Interface.
  1368  func (c *immediateCloseWatcher) ResultChan() <-chan watch.Event {
  1369  	return c.result
  1370  }
  1371  
  1372  // Implements watch.Interface.
  1373  func (c *immediateCloseWatcher) Stop() {
  1374  	// no-op
  1375  }