k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/util/manager/watch_based_manager.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package manager 18 19 import ( 20 "fmt" 21 "sync" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/client-go/tools/cache" 26 27 "k8s.io/klog/v2" 28 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/fields" 32 "k8s.io/apimachinery/pkg/runtime" 33 "k8s.io/apimachinery/pkg/runtime/schema" 34 "k8s.io/apimachinery/pkg/types" 35 "k8s.io/apimachinery/pkg/util/sets" 36 "k8s.io/apimachinery/pkg/util/wait" 37 "k8s.io/apimachinery/pkg/watch" 38 "k8s.io/utils/clock" 39 ) 40 41 type listObjectFunc func(string, metav1.ListOptions) (runtime.Object, error) 42 type watchObjectFunc func(string, metav1.ListOptions) (watch.Interface, error) 43 type newObjectFunc func() runtime.Object 44 type isImmutableFunc func(runtime.Object) bool 45 46 // objectCacheItem is a single item stored in objectCache. 47 type objectCacheItem struct { 48 refMap map[types.UID]int 49 store *cacheStore 50 reflector *cache.Reflector 51 52 hasSynced func() (bool, error) 53 54 // waitGroup is used to ensure that there won't be two concurrent calls to reflector.Run 55 waitGroup sync.WaitGroup 56 57 // lock is to ensure the access and modify of lastAccessTime, stopped, and immutable are thread safety, 58 // and protecting from closing stopCh multiple times. 59 lock sync.Mutex 60 lastAccessTime time.Time 61 stopped bool 62 immutable bool 63 stopCh chan struct{} 64 } 65 66 func (i *objectCacheItem) stop() bool { 67 i.lock.Lock() 68 defer i.lock.Unlock() 69 return i.stopThreadUnsafe() 70 } 71 72 func (i *objectCacheItem) stopThreadUnsafe() bool { 73 if i.stopped { 74 return false 75 } 76 i.stopped = true 77 close(i.stopCh) 78 if !i.immutable { 79 i.store.unsetInitialized() 80 } 81 return true 82 } 83 84 func (i *objectCacheItem) setLastAccessTime(time time.Time) { 85 i.lock.Lock() 86 defer i.lock.Unlock() 87 i.lastAccessTime = time 88 } 89 90 func (i *objectCacheItem) setImmutable() { 91 i.lock.Lock() 92 defer i.lock.Unlock() 93 i.immutable = true 94 } 95 96 func (i *objectCacheItem) stopIfIdle(now time.Time, maxIdleTime time.Duration) bool { 97 i.lock.Lock() 98 defer i.lock.Unlock() 99 // Ensure that we don't try to stop not yet initialized reflector. 100 // In case of overloaded kube-apiserver, if the list request is 101 // already being processed, all the work would lost and would have 102 // to be retried. 103 if !i.stopped && i.store.hasSynced() && now.After(i.lastAccessTime.Add(maxIdleTime)) { 104 return i.stopThreadUnsafe() 105 } 106 return false 107 } 108 109 func (i *objectCacheItem) restartReflectorIfNeeded() { 110 i.lock.Lock() 111 defer i.lock.Unlock() 112 if i.immutable || !i.stopped { 113 return 114 } 115 i.stopCh = make(chan struct{}) 116 i.stopped = false 117 go i.startReflector() 118 } 119 120 func (i *objectCacheItem) startReflector() { 121 i.waitGroup.Wait() 122 i.waitGroup.Add(1) 123 defer i.waitGroup.Done() 124 i.reflector.Run(i.stopCh) 125 } 126 127 // cacheStore is in order to rewrite Replace function to mark initialized flag 128 type cacheStore struct { 129 cache.Store 130 lock sync.Mutex 131 initialized bool 132 } 133 134 func (c *cacheStore) Replace(list []interface{}, resourceVersion string) error { 135 c.lock.Lock() 136 defer c.lock.Unlock() 137 err := c.Store.Replace(list, resourceVersion) 138 if err != nil { 139 return err 140 } 141 c.initialized = true 142 return nil 143 } 144 145 func (c *cacheStore) hasSynced() bool { 146 c.lock.Lock() 147 defer c.lock.Unlock() 148 return c.initialized 149 } 150 151 func (c *cacheStore) unsetInitialized() { 152 c.lock.Lock() 153 defer c.lock.Unlock() 154 c.initialized = false 155 } 156 157 // objectCache is a local cache of objects propagated via 158 // individual watches. 159 type objectCache struct { 160 listObject listObjectFunc 161 watchObject watchObjectFunc 162 newObject newObjectFunc 163 isImmutable isImmutableFunc 164 groupResource schema.GroupResource 165 clock clock.Clock 166 maxIdleTime time.Duration 167 168 lock sync.RWMutex 169 items map[objectKey]*objectCacheItem 170 stopped bool 171 } 172 173 const minIdleTime = 1 * time.Minute 174 175 // NewObjectCache returns a new watch-based instance of Store interface. 176 func NewObjectCache( 177 listObject listObjectFunc, 178 watchObject watchObjectFunc, 179 newObject newObjectFunc, 180 isImmutable isImmutableFunc, 181 groupResource schema.GroupResource, 182 clock clock.Clock, 183 maxIdleTime time.Duration, 184 stopCh <-chan struct{}) Store { 185 186 if maxIdleTime < minIdleTime { 187 maxIdleTime = minIdleTime 188 } 189 190 store := &objectCache{ 191 listObject: listObject, 192 watchObject: watchObject, 193 newObject: newObject, 194 isImmutable: isImmutable, 195 groupResource: groupResource, 196 clock: clock, 197 maxIdleTime: maxIdleTime, 198 items: make(map[objectKey]*objectCacheItem), 199 } 200 201 go wait.Until(store.startRecycleIdleWatch, time.Minute, stopCh) 202 go store.shutdownWhenStopped(stopCh) 203 return store 204 } 205 206 func (c *objectCache) newStore() *cacheStore { 207 // TODO: We may consider created a dedicated store keeping just a single 208 // item, instead of using a generic store implementation for this purpose. 209 // However, simple benchmarks show that memory overhead in that case is 210 // decrease from ~600B to ~300B per object. So we are not optimizing it 211 // until we will see a good reason for that. 212 store := cache.NewStore(cache.MetaNamespaceKeyFunc) 213 return &cacheStore{store, sync.Mutex{}, false} 214 } 215 216 func (c *objectCache) newReflectorLocked(namespace, name string) *objectCacheItem { 217 fieldSelector := fields.Set{"metadata.name": name}.AsSelector().String() 218 listFunc := func(options metav1.ListOptions) (runtime.Object, error) { 219 options.FieldSelector = fieldSelector 220 return c.listObject(namespace, options) 221 } 222 watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { 223 options.FieldSelector = fieldSelector 224 return c.watchObject(namespace, options) 225 } 226 store := c.newStore() 227 reflector := cache.NewReflectorWithOptions( 228 &cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc}, 229 c.newObject(), 230 store, 231 cache.ReflectorOptions{ 232 Name: fmt.Sprintf("object-%q/%q", namespace, name), 233 // Bump default 5m MinWatchTimeout to avoid recreating 234 // watches too often. 235 MinWatchTimeout: 30 * time.Minute, 236 }, 237 ) 238 item := &objectCacheItem{ 239 refMap: make(map[types.UID]int), 240 store: store, 241 reflector: reflector, 242 hasSynced: func() (bool, error) { return store.hasSynced(), nil }, 243 stopCh: make(chan struct{}), 244 } 245 246 // Don't start reflector if Kubelet is already shutting down. 247 if !c.stopped { 248 go item.startReflector() 249 } 250 return item 251 } 252 253 func (c *objectCache) AddReference(namespace, name string, referencedFrom types.UID) { 254 key := objectKey{namespace: namespace, name: name} 255 256 // AddReference is called from RegisterPod thus it needs to be efficient. 257 // Thus, it is only increasing refCount and in case of first registration 258 // of a given object it starts corresponding reflector. 259 // It's responsibility of the first Get operation to wait until the 260 // reflector propagated the store. 261 c.lock.Lock() 262 defer c.lock.Unlock() 263 item, exists := c.items[key] 264 if !exists { 265 item = c.newReflectorLocked(namespace, name) 266 c.items[key] = item 267 } 268 item.refMap[referencedFrom]++ 269 } 270 271 func (c *objectCache) DeleteReference(namespace, name string, referencedFrom types.UID) { 272 key := objectKey{namespace: namespace, name: name} 273 274 c.lock.Lock() 275 defer c.lock.Unlock() 276 if item, ok := c.items[key]; ok { 277 item.refMap[referencedFrom]-- 278 if item.refMap[referencedFrom] == 0 { 279 delete(item.refMap, referencedFrom) 280 } 281 if len(item.refMap) == 0 { 282 // Stop the underlying reflector. 283 item.stop() 284 delete(c.items, key) 285 } 286 } 287 } 288 289 // key returns key of an object with a given name and namespace. 290 // This has to be in-sync with cache.MetaNamespaceKeyFunc. 291 func (c *objectCache) key(namespace, name string) string { 292 if len(namespace) > 0 { 293 return namespace + "/" + name 294 } 295 return name 296 } 297 298 func (c *objectCache) isStopped() bool { 299 c.lock.RLock() 300 defer c.lock.RUnlock() 301 return c.stopped 302 } 303 304 func (c *objectCache) Get(namespace, name string) (runtime.Object, error) { 305 key := objectKey{namespace: namespace, name: name} 306 307 c.lock.RLock() 308 item, exists := c.items[key] 309 c.lock.RUnlock() 310 311 if !exists { 312 return nil, fmt.Errorf("object %q/%q not registered", namespace, name) 313 } 314 // Record last access time independently if it succeeded or not. 315 // This protects from premature (racy) reflector closure. 316 item.setLastAccessTime(c.clock.Now()) 317 318 // Don't restart reflector if Kubelet is already shutting down. 319 if !c.isStopped() { 320 item.restartReflectorIfNeeded() 321 } 322 if err := wait.PollImmediate(10*time.Millisecond, time.Second, item.hasSynced); err != nil { 323 return nil, fmt.Errorf("failed to sync %s cache: %v", c.groupResource.String(), err) 324 } 325 obj, exists, err := item.store.GetByKey(c.key(namespace, name)) 326 if err != nil { 327 return nil, err 328 } 329 if !exists { 330 return nil, apierrors.NewNotFound(c.groupResource, name) 331 } 332 if object, ok := obj.(runtime.Object); ok { 333 // If the returned object is immutable, stop the reflector. 334 // 335 // NOTE: we may potentially not even start the reflector if the object is 336 // already immutable. However, given that: 337 // - we want to also handle the case when object is marked as immutable later 338 // - Secrets and ConfigMaps are periodically fetched by volumemanager anyway 339 // - doing that wouldn't provide visible scalability/performance gain - we 340 // already have it from here 341 // - doing that would require significant refactoring to reflector 342 // we limit ourselves to just quickly stop the reflector here. 343 if c.isImmutable(object) { 344 item.setImmutable() 345 if item.stop() { 346 klog.V(4).InfoS("Stopped watching for changes - object is immutable", "obj", klog.KRef(namespace, name)) 347 } 348 } 349 return object, nil 350 } 351 return nil, fmt.Errorf("unexpected object type: %v", obj) 352 } 353 354 func (c *objectCache) startRecycleIdleWatch() { 355 c.lock.Lock() 356 defer c.lock.Unlock() 357 358 for key, item := range c.items { 359 if item.stopIfIdle(c.clock.Now(), c.maxIdleTime) { 360 klog.V(4).InfoS("Not acquired for long time, Stopped watching for changes", "objectKey", key, "maxIdleTime", c.maxIdleTime) 361 } 362 } 363 } 364 365 func (c *objectCache) shutdownWhenStopped(stopCh <-chan struct{}) { 366 <-stopCh 367 368 c.lock.Lock() 369 defer c.lock.Unlock() 370 371 c.stopped = true 372 for _, item := range c.items { 373 item.stop() 374 } 375 } 376 377 // NewWatchBasedManager creates a manager that keeps a cache of all objects 378 // necessary for registered pods. 379 // It implements the following logic: 380 // - whenever a pod is created or updated, we start individual watches for all 381 // referenced objects that aren't referenced from other registered pods 382 // - every GetObject() returns a value from local cache propagated via watches 383 func NewWatchBasedManager( 384 listObject listObjectFunc, 385 watchObject watchObjectFunc, 386 newObject newObjectFunc, 387 isImmutable isImmutableFunc, 388 groupResource schema.GroupResource, 389 resyncInterval time.Duration, 390 getReferencedObjects func(*v1.Pod) sets.String) Manager { 391 392 // If a configmap/secret is used as a volume, the volumeManager will visit the objectCacheItem every resyncInterval cycle, 393 // We just want to stop the objectCacheItem referenced by environment variables, 394 // So, maxIdleTime is set to an integer multiple of resyncInterval, 395 // We currently set it to 5 times. 396 maxIdleTime := resyncInterval * 5 397 398 // TODO propagate stopCh from the higher level. 399 objectStore := NewObjectCache(listObject, watchObject, newObject, isImmutable, groupResource, clock.RealClock{}, maxIdleTime, wait.NeverStop) 400 return NewCacheBasedManager(objectStore, getReferencedObjects) 401 }