k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/util/manager/watch_based_manager.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package manager
    18  
    19  import (
    20  	"fmt"
    21  	"sync"
    22  	"time"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	"k8s.io/client-go/tools/cache"
    26  
    27  	"k8s.io/klog/v2"
    28  
    29  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/fields"
    32  	"k8s.io/apimachinery/pkg/runtime"
    33  	"k8s.io/apimachinery/pkg/runtime/schema"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/apimachinery/pkg/util/sets"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	"k8s.io/apimachinery/pkg/watch"
    38  	"k8s.io/utils/clock"
    39  )
    40  
    41  type listObjectFunc func(string, metav1.ListOptions) (runtime.Object, error)
    42  type watchObjectFunc func(string, metav1.ListOptions) (watch.Interface, error)
    43  type newObjectFunc func() runtime.Object
    44  type isImmutableFunc func(runtime.Object) bool
    45  
    46  // objectCacheItem is a single item stored in objectCache.
    47  type objectCacheItem struct {
    48  	refMap    map[types.UID]int
    49  	store     *cacheStore
    50  	reflector *cache.Reflector
    51  
    52  	hasSynced func() (bool, error)
    53  
    54  	// waitGroup is used to ensure that there won't be two concurrent calls to reflector.Run
    55  	waitGroup sync.WaitGroup
    56  
    57  	// lock is to ensure the access and modify of lastAccessTime, stopped, and immutable are thread safety,
    58  	// and protecting from closing stopCh multiple times.
    59  	lock           sync.Mutex
    60  	lastAccessTime time.Time
    61  	stopped        bool
    62  	immutable      bool
    63  	stopCh         chan struct{}
    64  }
    65  
    66  func (i *objectCacheItem) stop() bool {
    67  	i.lock.Lock()
    68  	defer i.lock.Unlock()
    69  	return i.stopThreadUnsafe()
    70  }
    71  
    72  func (i *objectCacheItem) stopThreadUnsafe() bool {
    73  	if i.stopped {
    74  		return false
    75  	}
    76  	i.stopped = true
    77  	close(i.stopCh)
    78  	if !i.immutable {
    79  		i.store.unsetInitialized()
    80  	}
    81  	return true
    82  }
    83  
    84  func (i *objectCacheItem) setLastAccessTime(time time.Time) {
    85  	i.lock.Lock()
    86  	defer i.lock.Unlock()
    87  	i.lastAccessTime = time
    88  }
    89  
    90  func (i *objectCacheItem) setImmutable() {
    91  	i.lock.Lock()
    92  	defer i.lock.Unlock()
    93  	i.immutable = true
    94  }
    95  
    96  func (i *objectCacheItem) stopIfIdle(now time.Time, maxIdleTime time.Duration) bool {
    97  	i.lock.Lock()
    98  	defer i.lock.Unlock()
    99  	// Ensure that we don't try to stop not yet initialized reflector.
   100  	// In case of overloaded kube-apiserver, if the list request is
   101  	// already being processed, all the work would lost and would have
   102  	// to be retried.
   103  	if !i.stopped && i.store.hasSynced() && now.After(i.lastAccessTime.Add(maxIdleTime)) {
   104  		return i.stopThreadUnsafe()
   105  	}
   106  	return false
   107  }
   108  
   109  func (i *objectCacheItem) restartReflectorIfNeeded() {
   110  	i.lock.Lock()
   111  	defer i.lock.Unlock()
   112  	if i.immutable || !i.stopped {
   113  		return
   114  	}
   115  	i.stopCh = make(chan struct{})
   116  	i.stopped = false
   117  	go i.startReflector()
   118  }
   119  
   120  func (i *objectCacheItem) startReflector() {
   121  	i.waitGroup.Wait()
   122  	i.waitGroup.Add(1)
   123  	defer i.waitGroup.Done()
   124  	i.reflector.Run(i.stopCh)
   125  }
   126  
   127  // cacheStore is in order to rewrite Replace function to mark initialized flag
   128  type cacheStore struct {
   129  	cache.Store
   130  	lock        sync.Mutex
   131  	initialized bool
   132  }
   133  
   134  func (c *cacheStore) Replace(list []interface{}, resourceVersion string) error {
   135  	c.lock.Lock()
   136  	defer c.lock.Unlock()
   137  	err := c.Store.Replace(list, resourceVersion)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	c.initialized = true
   142  	return nil
   143  }
   144  
   145  func (c *cacheStore) hasSynced() bool {
   146  	c.lock.Lock()
   147  	defer c.lock.Unlock()
   148  	return c.initialized
   149  }
   150  
   151  func (c *cacheStore) unsetInitialized() {
   152  	c.lock.Lock()
   153  	defer c.lock.Unlock()
   154  	c.initialized = false
   155  }
   156  
   157  // objectCache is a local cache of objects propagated via
   158  // individual watches.
   159  type objectCache struct {
   160  	listObject    listObjectFunc
   161  	watchObject   watchObjectFunc
   162  	newObject     newObjectFunc
   163  	isImmutable   isImmutableFunc
   164  	groupResource schema.GroupResource
   165  	clock         clock.Clock
   166  	maxIdleTime   time.Duration
   167  
   168  	lock    sync.RWMutex
   169  	items   map[objectKey]*objectCacheItem
   170  	stopped bool
   171  }
   172  
   173  const minIdleTime = 1 * time.Minute
   174  
   175  // NewObjectCache returns a new watch-based instance of Store interface.
   176  func NewObjectCache(
   177  	listObject listObjectFunc,
   178  	watchObject watchObjectFunc,
   179  	newObject newObjectFunc,
   180  	isImmutable isImmutableFunc,
   181  	groupResource schema.GroupResource,
   182  	clock clock.Clock,
   183  	maxIdleTime time.Duration,
   184  	stopCh <-chan struct{}) Store {
   185  
   186  	if maxIdleTime < minIdleTime {
   187  		maxIdleTime = minIdleTime
   188  	}
   189  
   190  	store := &objectCache{
   191  		listObject:    listObject,
   192  		watchObject:   watchObject,
   193  		newObject:     newObject,
   194  		isImmutable:   isImmutable,
   195  		groupResource: groupResource,
   196  		clock:         clock,
   197  		maxIdleTime:   maxIdleTime,
   198  		items:         make(map[objectKey]*objectCacheItem),
   199  	}
   200  
   201  	go wait.Until(store.startRecycleIdleWatch, time.Minute, stopCh)
   202  	go store.shutdownWhenStopped(stopCh)
   203  	return store
   204  }
   205  
   206  func (c *objectCache) newStore() *cacheStore {
   207  	// TODO: We may consider created a dedicated store keeping just a single
   208  	// item, instead of using a generic store implementation for this purpose.
   209  	// However, simple benchmarks show that memory overhead in that case is
   210  	// decrease from ~600B to ~300B per object. So we are not optimizing it
   211  	// until we will see a good reason for that.
   212  	store := cache.NewStore(cache.MetaNamespaceKeyFunc)
   213  	return &cacheStore{store, sync.Mutex{}, false}
   214  }
   215  
   216  func (c *objectCache) newReflectorLocked(namespace, name string) *objectCacheItem {
   217  	fieldSelector := fields.Set{"metadata.name": name}.AsSelector().String()
   218  	listFunc := func(options metav1.ListOptions) (runtime.Object, error) {
   219  		options.FieldSelector = fieldSelector
   220  		return c.listObject(namespace, options)
   221  	}
   222  	watchFunc := func(options metav1.ListOptions) (watch.Interface, error) {
   223  		options.FieldSelector = fieldSelector
   224  		return c.watchObject(namespace, options)
   225  	}
   226  	store := c.newStore()
   227  	reflector := cache.NewReflectorWithOptions(
   228  		&cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc},
   229  		c.newObject(),
   230  		store,
   231  		cache.ReflectorOptions{
   232  			Name: fmt.Sprintf("object-%q/%q", namespace, name),
   233  			// Bump default 5m MinWatchTimeout to avoid recreating
   234  			// watches too often.
   235  			MinWatchTimeout: 30 * time.Minute,
   236  		},
   237  	)
   238  	item := &objectCacheItem{
   239  		refMap:    make(map[types.UID]int),
   240  		store:     store,
   241  		reflector: reflector,
   242  		hasSynced: func() (bool, error) { return store.hasSynced(), nil },
   243  		stopCh:    make(chan struct{}),
   244  	}
   245  
   246  	// Don't start reflector if Kubelet is already shutting down.
   247  	if !c.stopped {
   248  		go item.startReflector()
   249  	}
   250  	return item
   251  }
   252  
   253  func (c *objectCache) AddReference(namespace, name string, referencedFrom types.UID) {
   254  	key := objectKey{namespace: namespace, name: name}
   255  
   256  	// AddReference is called from RegisterPod thus it needs to be efficient.
   257  	// Thus, it is only increasing refCount and in case of first registration
   258  	// of a given object it starts corresponding reflector.
   259  	// It's responsibility of the first Get operation to wait until the
   260  	// reflector propagated the store.
   261  	c.lock.Lock()
   262  	defer c.lock.Unlock()
   263  	item, exists := c.items[key]
   264  	if !exists {
   265  		item = c.newReflectorLocked(namespace, name)
   266  		c.items[key] = item
   267  	}
   268  	item.refMap[referencedFrom]++
   269  }
   270  
   271  func (c *objectCache) DeleteReference(namespace, name string, referencedFrom types.UID) {
   272  	key := objectKey{namespace: namespace, name: name}
   273  
   274  	c.lock.Lock()
   275  	defer c.lock.Unlock()
   276  	if item, ok := c.items[key]; ok {
   277  		item.refMap[referencedFrom]--
   278  		if item.refMap[referencedFrom] == 0 {
   279  			delete(item.refMap, referencedFrom)
   280  		}
   281  		if len(item.refMap) == 0 {
   282  			// Stop the underlying reflector.
   283  			item.stop()
   284  			delete(c.items, key)
   285  		}
   286  	}
   287  }
   288  
   289  // key returns key of an object with a given name and namespace.
   290  // This has to be in-sync with cache.MetaNamespaceKeyFunc.
   291  func (c *objectCache) key(namespace, name string) string {
   292  	if len(namespace) > 0 {
   293  		return namespace + "/" + name
   294  	}
   295  	return name
   296  }
   297  
   298  func (c *objectCache) isStopped() bool {
   299  	c.lock.RLock()
   300  	defer c.lock.RUnlock()
   301  	return c.stopped
   302  }
   303  
   304  func (c *objectCache) Get(namespace, name string) (runtime.Object, error) {
   305  	key := objectKey{namespace: namespace, name: name}
   306  
   307  	c.lock.RLock()
   308  	item, exists := c.items[key]
   309  	c.lock.RUnlock()
   310  
   311  	if !exists {
   312  		return nil, fmt.Errorf("object %q/%q not registered", namespace, name)
   313  	}
   314  	// Record last access time independently if it succeeded or not.
   315  	// This protects from premature (racy) reflector closure.
   316  	item.setLastAccessTime(c.clock.Now())
   317  
   318  	// Don't restart reflector if Kubelet is already shutting down.
   319  	if !c.isStopped() {
   320  		item.restartReflectorIfNeeded()
   321  	}
   322  	if err := wait.PollImmediate(10*time.Millisecond, time.Second, item.hasSynced); err != nil {
   323  		return nil, fmt.Errorf("failed to sync %s cache: %v", c.groupResource.String(), err)
   324  	}
   325  	obj, exists, err := item.store.GetByKey(c.key(namespace, name))
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	if !exists {
   330  		return nil, apierrors.NewNotFound(c.groupResource, name)
   331  	}
   332  	if object, ok := obj.(runtime.Object); ok {
   333  		// If the returned object is immutable, stop the reflector.
   334  		//
   335  		// NOTE: we may potentially not even start the reflector if the object is
   336  		// already immutable. However, given that:
   337  		// - we want to also handle the case when object is marked as immutable later
   338  		// - Secrets and ConfigMaps are periodically fetched by volumemanager anyway
   339  		// - doing that wouldn't provide visible scalability/performance gain - we
   340  		//   already have it from here
   341  		// - doing that would require significant refactoring to reflector
   342  		// we limit ourselves to just quickly stop the reflector here.
   343  		if c.isImmutable(object) {
   344  			item.setImmutable()
   345  			if item.stop() {
   346  				klog.V(4).InfoS("Stopped watching for changes - object is immutable", "obj", klog.KRef(namespace, name))
   347  			}
   348  		}
   349  		return object, nil
   350  	}
   351  	return nil, fmt.Errorf("unexpected object type: %v", obj)
   352  }
   353  
   354  func (c *objectCache) startRecycleIdleWatch() {
   355  	c.lock.Lock()
   356  	defer c.lock.Unlock()
   357  
   358  	for key, item := range c.items {
   359  		if item.stopIfIdle(c.clock.Now(), c.maxIdleTime) {
   360  			klog.V(4).InfoS("Not acquired for long time, Stopped watching for changes", "objectKey", key, "maxIdleTime", c.maxIdleTime)
   361  		}
   362  	}
   363  }
   364  
   365  func (c *objectCache) shutdownWhenStopped(stopCh <-chan struct{}) {
   366  	<-stopCh
   367  
   368  	c.lock.Lock()
   369  	defer c.lock.Unlock()
   370  
   371  	c.stopped = true
   372  	for _, item := range c.items {
   373  		item.stop()
   374  	}
   375  }
   376  
   377  // NewWatchBasedManager creates a manager that keeps a cache of all objects
   378  // necessary for registered pods.
   379  // It implements the following logic:
   380  //   - whenever a pod is created or updated, we start individual watches for all
   381  //     referenced objects that aren't referenced from other registered pods
   382  //   - every GetObject() returns a value from local cache propagated via watches
   383  func NewWatchBasedManager(
   384  	listObject listObjectFunc,
   385  	watchObject watchObjectFunc,
   386  	newObject newObjectFunc,
   387  	isImmutable isImmutableFunc,
   388  	groupResource schema.GroupResource,
   389  	resyncInterval time.Duration,
   390  	getReferencedObjects func(*v1.Pod) sets.String) Manager {
   391  
   392  	// If a configmap/secret is used as a volume, the volumeManager will visit the objectCacheItem every resyncInterval cycle,
   393  	// We just want to stop the objectCacheItem referenced by environment variables,
   394  	// So, maxIdleTime is set to an integer multiple of resyncInterval,
   395  	// We currently set it to 5 times.
   396  	maxIdleTime := resyncInterval * 5
   397  
   398  	// TODO propagate stopCh from the higher level.
   399  	objectStore := NewObjectCache(listObject, watchObject, newObject, isImmutable, groupResource, clock.RealClock{}, maxIdleTime, wait.NeverStop)
   400  	return NewCacheBasedManager(objectStore, getReferencedObjects)
   401  }