k8s.io/apiserver@v0.29.3/pkg/storage/cacher/watch_cache.go

k8s.io/apiserver@v0.29.3/pkg/storage/cacher/watch_cache.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cacher
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"sort"
    24  	"sync"
    25  	"time"
    26  
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	"k8s.io/apimachinery/pkg/fields"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/runtime"
    31  	"k8s.io/apimachinery/pkg/runtime/schema"
    32  	"k8s.io/apimachinery/pkg/watch"
    33  	"k8s.io/apiserver/pkg/features"
    34  	"k8s.io/apiserver/pkg/storage"
    35  	"k8s.io/apiserver/pkg/storage/cacher/metrics"
    36  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    37  	"k8s.io/client-go/tools/cache"
    38  	"k8s.io/component-base/tracing"
    39  	"k8s.io/klog/v2"
    40  	"k8s.io/utils/clock"
    41  )
    42  
    43  const (
    44  	// blockTimeout determines how long we're willing to block the request
    45  	// to wait for a given resource version to be propagated to cache,
    46  	// before terminating request and returning Timeout error with retry
    47  	// after suggestion.
    48  	blockTimeout = 3 * time.Second
    49  
    50  	// resourceVersionTooHighRetrySeconds is the seconds before a operation should be retried by the client
    51  	// after receiving a 'too high resource version' error.
    52  	resourceVersionTooHighRetrySeconds = 1
    53  
    54  	// eventFreshDuration is time duration of events we want to keep.
    55  	// We set it to `defaultBookmarkFrequency` plus epsilon to maximize
    56  	// chances that last bookmark was sent within kept history, at the
    57  	// same time, minimizing the needed memory usage.
    58  	eventFreshDuration = 75 * time.Second
    59  
    60  	// defaultLowerBoundCapacity is a default value for event cache capacity's lower bound.
    61  	// TODO: Figure out, to what value we can decreased it.
    62  	defaultLowerBoundCapacity = 100
    63  
    64  	// defaultUpperBoundCapacity  should be able to keep eventFreshDuration of history.
    65  	defaultUpperBoundCapacity = 100 * 1024
    66  )
    67  
    68  // watchCacheEvent is a single "watch event" that is send to users of
    69  // watchCache. Additionally to a typical "watch.Event" it contains
    70  // the previous value of the object to enable proper filtering in the
    71  // upper layers.
    72  type watchCacheEvent struct {
    73  	Type            watch.EventType
    74  	Object          runtime.Object
    75  	ObjLabels       labels.Set
    76  	ObjFields       fields.Set
    77  	PrevObject      runtime.Object
    78  	PrevObjLabels   labels.Set
    79  	PrevObjFields   fields.Set
    80  	Key             string
    81  	ResourceVersion uint64
    82  	RecordTime      time.Time
    83  }
    84  
    85  // Computing a key of an object is generally non-trivial (it performs
    86  // e.g. validation underneath). Similarly computing object fields and
    87  // labels. To avoid computing them multiple times (to serve the event
    88  // in different List/Watch requests), in the underlying store we are
    89  // keeping structs (key, object, labels, fields).
    90  type storeElement struct {
    91  	Key    string
    92  	Object runtime.Object
    93  	Labels labels.Set
    94  	Fields fields.Set
    95  }
    96  
    97  func storeElementKey(obj interface{}) (string, error) {
    98  	elem, ok := obj.(*storeElement)
    99  	if !ok {
   100  		return "", fmt.Errorf("not a storeElement: %v", obj)
   101  	}
   102  	return elem.Key, nil
   103  }
   104  
   105  func storeElementObject(obj interface{}) (runtime.Object, error) {
   106  	elem, ok := obj.(*storeElement)
   107  	if !ok {
   108  		return nil, fmt.Errorf("not a storeElement: %v", obj)
   109  	}
   110  	return elem.Object, nil
   111  }
   112  
   113  func storeElementIndexFunc(objIndexFunc cache.IndexFunc) cache.IndexFunc {
   114  	return func(obj interface{}) (strings []string, e error) {
   115  		seo, err := storeElementObject(obj)
   116  		if err != nil {
   117  			return nil, err
   118  		}
   119  		return objIndexFunc(seo)
   120  	}
   121  }
   122  
   123  func storeElementIndexers(indexers *cache.Indexers) cache.Indexers {
   124  	if indexers == nil {
   125  		return cache.Indexers{}
   126  	}
   127  	ret := cache.Indexers{}
   128  	for indexName, indexFunc := range *indexers {
   129  		ret[indexName] = storeElementIndexFunc(indexFunc)
   130  	}
   131  	return ret
   132  }
   133  
   134  // watchCache implements a Store interface.
   135  // However, it depends on the elements implementing runtime.Object interface.
   136  //
   137  // watchCache is a "sliding window" (with a limited capacity) of objects
   138  // observed from a watch.
   139  type watchCache struct {
   140  	sync.RWMutex
   141  
   142  	// Condition on which lists are waiting for the fresh enough
   143  	// resource version.
   144  	cond *sync.Cond
   145  
   146  	// Maximum size of history window.
   147  	capacity int
   148  
   149  	// upper bound of capacity since event cache has a dynamic size.
   150  	upperBoundCapacity int
   151  
   152  	// lower bound of capacity since event cache has a dynamic size.
   153  	lowerBoundCapacity int
   154  
   155  	// keyFunc is used to get a key in the underlying storage for a given object.
   156  	keyFunc func(runtime.Object) (string, error)
   157  
   158  	// getAttrsFunc is used to get labels and fields of an object.
   159  	getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error)
   160  
   161  	// cache is used a cyclic buffer - the "current" contents of it are
   162  	// stored in [start_index%capacity, end_index%capacity) - so the
   163  	// "current" contents have exactly end_index-start_index items.
   164  	cache      []*watchCacheEvent
   165  	startIndex int
   166  	endIndex   int
   167  	// removedEventSinceRelist holds the information whether any of the events
   168  	// were already removed from the `cache` cyclic buffer since the last relist
   169  	removedEventSinceRelist bool
   170  
   171  	// store will effectively support LIST operation from the "end of cache
   172  	// history" i.e. from the moment just after the newest cached watched event.
   173  	// It is necessary to effectively allow clients to start watching at now.
   174  	// NOTE: We assume that <store> is thread-safe.
   175  	store cache.Indexer
   176  
   177  	// ResourceVersion up to which the watchCache is propagated.
   178  	resourceVersion uint64
   179  
   180  	// ResourceVersion of the last list result (populated via Replace() method).
   181  	listResourceVersion uint64
   182  
   183  	// This handler is run at the end of every successful Replace() method.
   184  	onReplace func()
   185  
   186  	// This handler is run at the end of every Add/Update/Delete method
   187  	// and additionally gets the previous value of the object.
   188  	eventHandler func(*watchCacheEvent)
   189  
   190  	// for testing timeouts.
   191  	clock clock.Clock
   192  
   193  	// An underlying storage.Versioner.
   194  	versioner storage.Versioner
   195  
   196  	// cacher's group resource
   197  	groupResource schema.GroupResource
   198  
   199  	// For testing cache interval invalidation.
   200  	indexValidator indexValidator
   201  
   202  	// Requests progress notification if there are requests waiting for watch
   203  	// to be fresh
   204  	waitingUntilFresh *conditionalProgressRequester
   205  }
   206  
   207  func newWatchCache(
   208  	keyFunc func(runtime.Object) (string, error),
   209  	eventHandler func(*watchCacheEvent),
   210  	getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error),
   211  	versioner storage.Versioner,
   212  	indexers *cache.Indexers,
   213  	clock clock.WithTicker,
   214  	groupResource schema.GroupResource,
   215  	progressRequester *conditionalProgressRequester) *watchCache {
   216  	wc := &watchCache{
   217  		capacity:            defaultLowerBoundCapacity,
   218  		keyFunc:             keyFunc,
   219  		getAttrsFunc:        getAttrsFunc,
   220  		cache:               make([]*watchCacheEvent, defaultLowerBoundCapacity),
   221  		lowerBoundCapacity:  defaultLowerBoundCapacity,
   222  		upperBoundCapacity:  defaultUpperBoundCapacity,
   223  		startIndex:          0,
   224  		endIndex:            0,
   225  		store:               cache.NewIndexer(storeElementKey, storeElementIndexers(indexers)),
   226  		resourceVersion:     0,
   227  		listResourceVersion: 0,
   228  		eventHandler:        eventHandler,
   229  		clock:               clock,
   230  		versioner:           versioner,
   231  		groupResource:       groupResource,
   232  		waitingUntilFresh:   progressRequester,
   233  	}
   234  	metrics.WatchCacheCapacity.WithLabelValues(groupResource.String()).Set(float64(wc.capacity))
   235  	wc.cond = sync.NewCond(wc.RLocker())
   236  	wc.indexValidator = wc.isIndexValidLocked
   237  
   238  	return wc
   239  }
   240  
   241  // Add takes runtime.Object as an argument.
   242  func (w *watchCache) Add(obj interface{}) error {
   243  	object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
   244  	if err != nil {
   245  		return err
   246  	}
   247  	event := watch.Event{Type: watch.Added, Object: object}
   248  
   249  	f := func(elem *storeElement) error { return w.store.Add(elem) }
   250  	return w.processEvent(event, resourceVersion, f)
   251  }
   252  
   253  // Update takes runtime.Object as an argument.
   254  func (w *watchCache) Update(obj interface{}) error {
   255  	object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
   256  	if err != nil {
   257  		return err
   258  	}
   259  	event := watch.Event{Type: watch.Modified, Object: object}
   260  
   261  	f := func(elem *storeElement) error { return w.store.Update(elem) }
   262  	return w.processEvent(event, resourceVersion, f)
   263  }
   264  
   265  // Delete takes runtime.Object as an argument.
   266  func (w *watchCache) Delete(obj interface{}) error {
   267  	object, resourceVersion, err := w.objectToVersionedRuntimeObject(obj)
   268  	if err != nil {
   269  		return err
   270  	}
   271  	event := watch.Event{Type: watch.Deleted, Object: object}
   272  
   273  	f := func(elem *storeElement) error { return w.store.Delete(elem) }
   274  	return w.processEvent(event, resourceVersion, f)
   275  }
   276  
   277  func (w *watchCache) objectToVersionedRuntimeObject(obj interface{}) (runtime.Object, uint64, error) {
   278  	object, ok := obj.(runtime.Object)
   279  	if !ok {
   280  		return nil, 0, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj)
   281  	}
   282  	resourceVersion, err := w.versioner.ObjectResourceVersion(object)
   283  	if err != nil {
   284  		return nil, 0, err
   285  	}
   286  	return object, resourceVersion, nil
   287  }
   288  
   289  // processEvent is safe as long as there is at most one call to it in flight
   290  // at any point in time.
   291  func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, updateFunc func(*storeElement) error) error {
   292  	metrics.EventsReceivedCounter.WithLabelValues(w.groupResource.String()).Inc()
   293  
   294  	key, err := w.keyFunc(event.Object)
   295  	if err != nil {
   296  		return fmt.Errorf("couldn't compute key: %v", err)
   297  	}
   298  	elem := &storeElement{Key: key, Object: event.Object}
   299  	elem.Labels, elem.Fields, err = w.getAttrsFunc(event.Object)
   300  	if err != nil {
   301  		return err
   302  	}
   303  
   304  	wcEvent := &watchCacheEvent{
   305  		Type:            event.Type,
   306  		Object:          elem.Object,
   307  		ObjLabels:       elem.Labels,
   308  		ObjFields:       elem.Fields,
   309  		Key:             key,
   310  		ResourceVersion: resourceVersion,
   311  		RecordTime:      w.clock.Now(),
   312  	}
   313  
   314  	if err := func() error {
   315  		// TODO: We should consider moving this lock below after the watchCacheEvent
   316  		// is created. In such situation, the only problematic scenario is Replace()
   317  		// happening after getting object from store and before acquiring a lock.
   318  		// Maybe introduce another lock for this purpose.
   319  		w.Lock()
   320  		defer w.Unlock()
   321  
   322  		previous, exists, err := w.store.Get(elem)
   323  		if err != nil {
   324  			return err
   325  		}
   326  		if exists {
   327  			previousElem := previous.(*storeElement)
   328  			wcEvent.PrevObject = previousElem.Object
   329  			wcEvent.PrevObjLabels = previousElem.Labels
   330  			wcEvent.PrevObjFields = previousElem.Fields
   331  		}
   332  
   333  		w.updateCache(wcEvent)
   334  		w.resourceVersion = resourceVersion
   335  		defer w.cond.Broadcast()
   336  
   337  		return updateFunc(elem)
   338  	}(); err != nil {
   339  		return err
   340  	}
   341  
   342  	// Avoid calling event handler under lock.
   343  	// This is safe as long as there is at most one call to Add/Update/Delete and
   344  	// UpdateResourceVersion in flight at any point in time, which is true now,
   345  	// because reflector calls them synchronously from its main thread.
   346  	if w.eventHandler != nil {
   347  		w.eventHandler(wcEvent)
   348  	}
   349  	return nil
   350  }
   351  
   352  // Assumes that lock is already held for write.
   353  func (w *watchCache) updateCache(event *watchCacheEvent) {
   354  	w.resizeCacheLocked(event.RecordTime)
   355  	if w.isCacheFullLocked() {
   356  		// Cache is full - remove the oldest element.
   357  		w.startIndex++
   358  		w.removedEventSinceRelist = true
   359  	}
   360  	w.cache[w.endIndex%w.capacity] = event
   361  	w.endIndex++
   362  }
   363  
   364  // resizeCacheLocked resizes the cache if necessary:
   365  // - increases capacity by 2x if cache is full and all cached events occurred within last eventFreshDuration.
   366  // - decreases capacity by 2x when recent quarter of events occurred outside of eventFreshDuration(protect watchCache from flapping).
   367  func (w *watchCache) resizeCacheLocked(eventTime time.Time) {
   368  	if w.isCacheFullLocked() && eventTime.Sub(w.cache[w.startIndex%w.capacity].RecordTime) < eventFreshDuration {
   369  		capacity := min(w.capacity*2, w.upperBoundCapacity)
   370  		if capacity > w.capacity {
   371  			w.doCacheResizeLocked(capacity)
   372  		}
   373  		return
   374  	}
   375  	if w.isCacheFullLocked() && eventTime.Sub(w.cache[(w.endIndex-w.capacity/4)%w.capacity].RecordTime) > eventFreshDuration {
   376  		capacity := max(w.capacity/2, w.lowerBoundCapacity)
   377  		if capacity < w.capacity {
   378  			w.doCacheResizeLocked(capacity)
   379  		}
   380  		return
   381  	}
   382  }
   383  
   384  // isCacheFullLocked used to judge whether watchCacheEvent is full.
   385  // Assumes that lock is already held for write.
   386  func (w *watchCache) isCacheFullLocked() bool {
   387  	return w.endIndex == w.startIndex+w.capacity
   388  }
   389  
   390  // doCacheResizeLocked resize watchCache's event array with different capacity.
   391  // Assumes that lock is already held for write.
   392  func (w *watchCache) doCacheResizeLocked(capacity int) {
   393  	newCache := make([]*watchCacheEvent, capacity)
   394  	if capacity < w.capacity {
   395  		// adjust startIndex if cache capacity shrink.
   396  		w.startIndex = w.endIndex - capacity
   397  	}
   398  	for i := w.startIndex; i < w.endIndex; i++ {
   399  		newCache[i%capacity] = w.cache[i%w.capacity]
   400  	}
   401  	w.cache = newCache
   402  	metrics.RecordsWatchCacheCapacityChange(w.groupResource.String(), w.capacity, capacity)
   403  	w.capacity = capacity
   404  }
   405  
   406  func (w *watchCache) UpdateResourceVersion(resourceVersion string) {
   407  	rv, err := w.versioner.ParseResourceVersion(resourceVersion)
   408  	if err != nil {
   409  		klog.Errorf("Couldn't parse resourceVersion: %v", err)
   410  		return
   411  	}
   412  
   413  	func() {
   414  		w.Lock()
   415  		defer w.Unlock()
   416  		w.resourceVersion = rv
   417  		w.cond.Broadcast()
   418  	}()
   419  
   420  	// Avoid calling event handler under lock.
   421  	// This is safe as long as there is at most one call to Add/Update/Delete and
   422  	// UpdateResourceVersion in flight at any point in time, which is true now,
   423  	// because reflector calls them synchronously from its main thread.
   424  	if w.eventHandler != nil {
   425  		wcEvent := &watchCacheEvent{
   426  			Type:            watch.Bookmark,
   427  			ResourceVersion: rv,
   428  		}
   429  		w.eventHandler(wcEvent)
   430  	}
   431  }
   432  
   433  // List returns list of pointers to <storeElement> objects.
   434  func (w *watchCache) List() []interface{} {
   435  	return w.store.List()
   436  }
   437  
   438  // waitUntilFreshAndBlock waits until cache is at least as fresh as given <resourceVersion>.
   439  // NOTE: This function acquired lock and doesn't release it.
   440  // You HAVE TO explicitly call w.RUnlock() after this function.
   441  func (w *watchCache) waitUntilFreshAndBlock(ctx context.Context, resourceVersion uint64) error {
   442  	startTime := w.clock.Now()
   443  
   444  	// In case resourceVersion is 0, we accept arbitrarily stale result.
   445  	// As a result, the condition in the below for loop will never be
   446  	// satisfied (w.resourceVersion is never negative), this call will
   447  	// never hit the w.cond.Wait().
   448  	// As a result - we can optimize the code by not firing the wakeup
   449  	// function (and avoid starting a gorotuine), especially given that
   450  	// resourceVersion=0 is the most common case.
   451  	if resourceVersion > 0 {
   452  		go func() {
   453  			// Wake us up when the time limit has expired.  The docs
   454  			// promise that time.After (well, NewTimer, which it calls)
   455  			// will wait *at least* the duration given. Since this go
   456  			// routine starts sometime after we record the start time, and
   457  			// it will wake up the loop below sometime after the broadcast,
   458  			// we don't need to worry about waking it up before the time
   459  			// has expired accidentally.
   460  			<-w.clock.After(blockTimeout)
   461  			w.cond.Broadcast()
   462  		}()
   463  	}
   464  
   465  	w.RLock()
   466  	span := tracing.SpanFromContext(ctx)
   467  	span.AddEvent("watchCache locked acquired")
   468  	for w.resourceVersion < resourceVersion {
   469  		if w.clock.Since(startTime) >= blockTimeout {
   470  			// Request that the client retry after 'resourceVersionTooHighRetrySeconds' seconds.
   471  			return storage.NewTooLargeResourceVersionError(resourceVersion, w.resourceVersion, resourceVersionTooHighRetrySeconds)
   472  		}
   473  		w.cond.Wait()
   474  	}
   475  	span.AddEvent("watchCache fresh enough")
   476  	return nil
   477  }
   478  
   479  type sortableStoreElements []interface{}
   480  
   481  func (s sortableStoreElements) Len() int {
   482  	return len(s)
   483  }
   484  
   485  func (s sortableStoreElements) Less(i, j int) bool {
   486  	return s[i].(*storeElement).Key < s[j].(*storeElement).Key
   487  }
   488  
   489  func (s sortableStoreElements) Swap(i, j int) {
   490  	s[i], s[j] = s[j], s[i]
   491  }
   492  
   493  // WaitUntilFreshAndList returns list of pointers to `storeElement` objects along
   494  // with their ResourceVersion and the name of the index, if any, that was used.
   495  func (w *watchCache) WaitUntilFreshAndList(ctx context.Context, resourceVersion uint64, matchValues []storage.MatchValue) ([]interface{}, uint64, string, error) {
   496  	var err error
   497  	if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentListFromCache) && w.notFresh(resourceVersion) {
   498  		w.waitingUntilFresh.Add()
   499  		err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
   500  		w.waitingUntilFresh.Remove()
   501  	} else {
   502  		err = w.waitUntilFreshAndBlock(ctx, resourceVersion)
   503  	}
   504  	defer w.RUnlock()
   505  	if err != nil {
   506  		return nil, 0, "", err
   507  	}
   508  
   509  	result, rv, index, err := func() ([]interface{}, uint64, string, error) {
   510  		// This isn't the place where we do "final filtering" - only some "prefiltering" is happening here. So the only
   511  		// requirement here is to NOT miss anything that should be returned. We can return as many non-matching items as we
   512  		// want - they will be filtered out later. The fact that we return less things is only further performance improvement.
   513  		// TODO: if multiple indexes match, return the one with the fewest items, so as to do as much filtering as possible.
   514  		for _, matchValue := range matchValues {
   515  			if result, err := w.store.ByIndex(matchValue.IndexName, matchValue.Value); err == nil {
   516  				return result, w.resourceVersion, matchValue.IndexName, nil
   517  			}
   518  		}
   519  		return w.store.List(), w.resourceVersion, "", nil
   520  	}()
   521  
   522  	sort.Sort(sortableStoreElements(result))
   523  	return result, rv, index, err
   524  }
   525  
   526  func (w *watchCache) notFresh(resourceVersion uint64) bool {
   527  	w.RLock()
   528  	defer w.RUnlock()
   529  	return resourceVersion > w.resourceVersion
   530  }
   531  
   532  // WaitUntilFreshAndGet returns a pointers to <storeElement> object.
   533  func (w *watchCache) WaitUntilFreshAndGet(ctx context.Context, resourceVersion uint64, key string) (interface{}, bool, uint64, error) {
   534  	err := w.waitUntilFreshAndBlock(ctx, resourceVersion)
   535  	defer w.RUnlock()
   536  	if err != nil {
   537  		return nil, false, 0, err
   538  	}
   539  	value, exists, err := w.store.GetByKey(key)
   540  	return value, exists, w.resourceVersion, err
   541  }
   542  
   543  func (w *watchCache) ListKeys() []string {
   544  	return w.store.ListKeys()
   545  }
   546  
   547  // Get takes runtime.Object as a parameter. However, it returns
   548  // pointer to <storeElement>.
   549  func (w *watchCache) Get(obj interface{}) (interface{}, bool, error) {
   550  	object, ok := obj.(runtime.Object)
   551  	if !ok {
   552  		return nil, false, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj)
   553  	}
   554  	key, err := w.keyFunc(object)
   555  	if err != nil {
   556  		return nil, false, fmt.Errorf("couldn't compute key: %v", err)
   557  	}
   558  
   559  	return w.store.Get(&storeElement{Key: key, Object: object})
   560  }
   561  
   562  // GetByKey returns pointer to <storeElement>.
   563  func (w *watchCache) GetByKey(key string) (interface{}, bool, error) {
   564  	return w.store.GetByKey(key)
   565  }
   566  
   567  // Replace takes slice of runtime.Object as a parameter.
   568  func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error {
   569  	version, err := w.versioner.ParseResourceVersion(resourceVersion)
   570  	if err != nil {
   571  		return err
   572  	}
   573  
   574  	toReplace := make([]interface{}, 0, len(objs))
   575  	for _, obj := range objs {
   576  		object, ok := obj.(runtime.Object)
   577  		if !ok {
   578  			return fmt.Errorf("didn't get runtime.Object for replace: %#v", obj)
   579  		}
   580  		key, err := w.keyFunc(object)
   581  		if err != nil {
   582  			return fmt.Errorf("couldn't compute key: %v", err)
   583  		}
   584  		objLabels, objFields, err := w.getAttrsFunc(object)
   585  		if err != nil {
   586  			return err
   587  		}
   588  		toReplace = append(toReplace, &storeElement{
   589  			Key:    key,
   590  			Object: object,
   591  			Labels: objLabels,
   592  			Fields: objFields,
   593  		})
   594  	}
   595  
   596  	w.Lock()
   597  	defer w.Unlock()
   598  
   599  	// Ensure startIndex never decreases, so that existing watchCacheInterval
   600  	// instances get "invalid" errors if the try to download from the buffer
   601  	// using their own start/end indexes calculated from previous buffer
   602  	// content.
   603  
   604  	// Empty the cyclic buffer, ensuring startIndex doesn't decrease.
   605  	w.startIndex = w.endIndex
   606  	w.removedEventSinceRelist = false
   607  
   608  	if err := w.store.Replace(toReplace, resourceVersion); err != nil {
   609  		return err
   610  	}
   611  	w.listResourceVersion = version
   612  	w.resourceVersion = version
   613  	if w.onReplace != nil {
   614  		w.onReplace()
   615  	}
   616  	w.cond.Broadcast()
   617  	klog.V(3).Infof("Replace watchCache (rev: %v) ", resourceVersion)
   618  	return nil
   619  }
   620  
   621  func (w *watchCache) SetOnReplace(onReplace func()) {
   622  	w.Lock()
   623  	defer w.Unlock()
   624  	w.onReplace = onReplace
   625  }
   626  
   627  func (w *watchCache) Resync() error {
   628  	// Nothing to do
   629  	return nil
   630  }
   631  
   632  func (w *watchCache) currentCapacity() int {
   633  	w.RLock()
   634  	defer w.RUnlock()
   635  	return w.capacity
   636  }
   637  
   638  const (
   639  	// minWatchChanSize is the min size of channels used by the watch.
   640  	// We keep that set to 10 for "backward compatibility" until we
   641  	// convince ourselves based on some metrics that decreasing is safe.
   642  	minWatchChanSize = 10
   643  	// maxWatchChanSizeWithIndexAndTriger is the max size of the channel
   644  	// used by the watch using the index and trigger selector.
   645  	maxWatchChanSizeWithIndexAndTrigger = 10
   646  	// maxWatchChanSizeWithIndexWithoutTrigger is the max size of the channel
   647  	// used by the watch using the index but without triggering selector.
   648  	// We keep that set to 1000 for "backward compatibility", until we
   649  	// convinced ourselves based on some metrics that decreasing is safe.
   650  	maxWatchChanSizeWithIndexWithoutTrigger = 1000
   651  	// maxWatchChanSizeWithoutIndex is the max size of the channel
   652  	// used by the watch not using the index.
   653  	// TODO(wojtek-t): Figure out if the value shouldn't be higher.
   654  	maxWatchChanSizeWithoutIndex = 100
   655  )
   656  
   657  func (w *watchCache) suggestedWatchChannelSize(indexExists, triggerUsed bool) int {
   658  	// To estimate the channel size we use a heuristic that a channel
   659  	// should roughly be able to keep one second of history.
   660  	// We don't have an exact data, but given we store updates from
   661  	// the last <eventFreshDuration>, we approach it by dividing the
   662  	// capacity by the length of the history window.
   663  	chanSize := int(math.Ceil(float64(w.currentCapacity()) / eventFreshDuration.Seconds()))
   664  
   665  	// Finally we adjust the size to avoid ending with too low or
   666  	// to large values.
   667  	if chanSize < minWatchChanSize {
   668  		chanSize = minWatchChanSize
   669  	}
   670  	var maxChanSize int
   671  	switch {
   672  	case indexExists && triggerUsed:
   673  		maxChanSize = maxWatchChanSizeWithIndexAndTrigger
   674  	case indexExists && !triggerUsed:
   675  		maxChanSize = maxWatchChanSizeWithIndexWithoutTrigger
   676  	case !indexExists:
   677  		maxChanSize = maxWatchChanSizeWithoutIndex
   678  	}
   679  	if chanSize > maxChanSize {
   680  		chanSize = maxChanSize
   681  	}
   682  	return chanSize
   683  }
   684  
   685  // isIndexValidLocked checks if a given index is still valid.
   686  // This assumes that the lock is held.
   687  func (w *watchCache) isIndexValidLocked(index int) bool {
   688  	return index >= w.startIndex
   689  }
   690  
   691  // getAllEventsSinceLocked returns a watchCacheInterval that can be used to
   692  // retrieve events since a certain resourceVersion. This function assumes to
   693  // be called under the watchCache lock.
   694  func (w *watchCache) getAllEventsSinceLocked(resourceVersion uint64) (*watchCacheInterval, error) {
   695  	size := w.endIndex - w.startIndex
   696  	var oldest uint64
   697  	switch {
   698  	case w.listResourceVersion > 0 && !w.removedEventSinceRelist:
   699  		// If no event was removed from the buffer since last relist, the oldest watch
   700  		// event we can deliver is one greater than the resource version of the list.
   701  		oldest = w.listResourceVersion + 1
   702  	case size > 0:
   703  		// If the previous condition is not satisfied: either some event was already
   704  		// removed from the buffer or we've never completed a list (the latter can
   705  		// only happen in unit tests that populate the buffer without performing
   706  		// list/replace operations), the oldest watch event we can deliver is the first
   707  		// one in the buffer.
   708  		oldest = w.cache[w.startIndex%w.capacity].ResourceVersion
   709  	default:
   710  		return nil, fmt.Errorf("watch cache isn't correctly initialized")
   711  	}
   712  
   713  	if resourceVersion == 0 {
   714  		// resourceVersion = 0 means that we don't require any specific starting point
   715  		// and we would like to start watching from ~now.
   716  		// However, to keep backward compatibility, we additionally need to return the
   717  		// current state and only then start watching from that point.
   718  		//
   719  		// TODO: In v2 api, we should stop returning the current state - #13969.
   720  		return w.getIntervalFromStoreLocked()
   721  	}
   722  	if resourceVersion < oldest-1 {
   723  		return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1))
   724  	}
   725  
   726  	// Binary search the smallest index at which resourceVersion is greater than the given one.
   727  	f := func(i int) bool {
   728  		return w.cache[(w.startIndex+i)%w.capacity].ResourceVersion > resourceVersion
   729  	}
   730  	first := sort.Search(size, f)
   731  	indexerFunc := func(i int) *watchCacheEvent {
   732  		return w.cache[i%w.capacity]
   733  	}
   734  	ci := newCacheInterval(w.startIndex+first, w.endIndex, indexerFunc, w.indexValidator, &w.RWMutex)
   735  	return ci, nil
   736  }
   737  
   738  // getIntervalFromStoreLocked returns a watchCacheInterval
   739  // that covers the entire storage state.
   740  // This function assumes to be called under the watchCache lock.
   741  func (w *watchCache) getIntervalFromStoreLocked() (*watchCacheInterval, error) {
   742  	ci, err := newCacheIntervalFromStore(w.resourceVersion, w.store, w.getAttrsFunc)
   743  	if err != nil {
   744  		return nil, err
   745  	}
   746  	return ci, nil
   747  }