k8s.io/kubernetes@v1.29.3/pkg/kubelet/util/manager/watch_based_manager.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package manager 18 19 import ( 20 "fmt" 21 "sync" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/client-go/tools/cache" 26 27 "k8s.io/klog/v2" 28 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/fields" 32 "k8s.io/apimachinery/pkg/runtime" 33 "k8s.io/apimachinery/pkg/runtime/schema" 34 "k8s.io/apimachinery/pkg/types" 35 "k8s.io/apimachinery/pkg/util/sets" 36 "k8s.io/apimachinery/pkg/util/wait" 37 "k8s.io/apimachinery/pkg/watch" 38 "k8s.io/utils/clock" 39 ) 40 41 type listObjectFunc func(string, metav1.ListOptions) (runtime.Object, error) 42 type watchObjectFunc func(string, metav1.ListOptions) (watch.Interface, error) 43 type newObjectFunc func() runtime.Object 44 type isImmutableFunc func(runtime.Object) bool 45 46 // objectCacheItem is a single item stored in objectCache. 47 type objectCacheItem struct { 48 refMap map[types.UID]int 49 store *cacheStore 50 reflector *cache.Reflector 51 52 hasSynced func() (bool, error) 53 54 // waitGroup is used to ensure that there won't be two concurrent calls to reflector.Run 55 waitGroup sync.WaitGroup 56 57 // lock is to ensure the access and modify of lastAccessTime, stopped, and immutable are thread safety, 58 // and protecting from closing stopCh multiple times. 59 lock sync.Mutex 60 lastAccessTime time.Time 61 stopped bool 62 immutable bool 63 stopCh chan struct{} 64 } 65 66 func (i *objectCacheItem) stop() bool { 67 i.lock.Lock() 68 defer i.lock.Unlock() 69 return i.stopThreadUnsafe() 70 } 71 72 func (i *objectCacheItem) stopThreadUnsafe() bool { 73 if i.stopped { 74 return false 75 } 76 i.stopped = true 77 close(i.stopCh) 78 if !i.immutable { 79 i.store.unsetInitialized() 80 } 81 return true 82 } 83 84 func (i *objectCacheItem) setLastAccessTime(time time.Time) { 85 i.lock.Lock() 86 defer i.lock.Unlock() 87 i.lastAccessTime = time 88 } 89 90 func (i *objectCacheItem) setImmutable() { 91 i.lock.Lock() 92 defer i.lock.Unlock() 93 i.immutable = true 94 } 95 96 func (i *objectCacheItem) stopIfIdle(now time.Time, maxIdleTime time.Duration) bool { 97 i.lock.Lock() 98 defer i.lock.Unlock() 99 // Ensure that we don't try to stop not yet initialized reflector. 100 // In case of overloaded kube-apiserver, if the list request is 101 // already being processed, all the work would lost and would have 102 // to be retried. 103 if !i.stopped && i.store.hasSynced() && now.After(i.lastAccessTime.Add(maxIdleTime)) { 104 return i.stopThreadUnsafe() 105 } 106 return false 107 } 108 109 func (i *objectCacheItem) restartReflectorIfNeeded() { 110 i.lock.Lock() 111 defer i.lock.Unlock() 112 if i.immutable || !i.stopped { 113 return 114 } 115 i.stopCh = make(chan struct{}) 116 i.stopped = false 117 go i.startReflector() 118 } 119 120 func (i *objectCacheItem) startReflector() { 121 i.waitGroup.Wait() 122 i.waitGroup.Add(1) 123 defer i.waitGroup.Done() 124 i.reflector.Run(i.stopCh) 125 } 126 127 // cacheStore is in order to rewrite Replace function to mark initialized flag 128 type cacheStore struct { 129 cache.Store 130 lock sync.Mutex 131 initialized bool 132 } 133 134 func (c *cacheStore) Replace(list []interface{}, resourceVersion string) error { 135 c.lock.Lock() 136 defer c.lock.Unlock() 137 err := c.Store.Replace(list, resourceVersion) 138 if err != nil { 139 return err 140 } 141 c.initialized = true 142 return nil 143 } 144 145 func (c *cacheStore) hasSynced() bool { 146 c.lock.Lock() 147 defer c.lock.Unlock() 148 return c.initialized 149 } 150 151 func (c *cacheStore) unsetInitialized() { 152 c.lock.Lock() 153 defer c.lock.Unlock() 154 c.initialized = false 155 } 156 157 // objectCache is a local cache of objects propagated via 158 // individual watches. 159 type objectCache struct { 160 listObject listObjectFunc 161 watchObject watchObjectFunc 162 newObject newObjectFunc 163 isImmutable isImmutableFunc 164 groupResource schema.GroupResource 165 clock clock.Clock 166 maxIdleTime time.Duration 167 168 lock sync.RWMutex 169 items map[objectKey]*objectCacheItem 170 stopped bool 171 } 172 173 const minIdleTime = 1 * time.Minute 174 175 // NewObjectCache returns a new watch-based instance of Store interface. 176 func NewObjectCache( 177 listObject listObjectFunc, 178 watchObject watchObjectFunc, 179 newObject newObjectFunc, 180 isImmutable isImmutableFunc, 181 groupResource schema.GroupResource, 182 clock clock.Clock, 183 maxIdleTime time.Duration, 184 stopCh <-chan struct{}) Store { 185 186 if maxIdleTime < minIdleTime { 187 maxIdleTime = minIdleTime 188 } 189 190 store := &objectCache{ 191 listObject: listObject, 192 watchObject: watchObject, 193 newObject: newObject, 194 isImmutable: isImmutable, 195 groupResource: groupResource, 196 clock: clock, 197 maxIdleTime: maxIdleTime, 198 items: make(map[objectKey]*objectCacheItem), 199 } 200 201 go wait.Until(store.startRecycleIdleWatch, time.Minute, stopCh) 202 go store.shutdownWhenStopped(stopCh) 203 return store 204 } 205 206 func (c *objectCache) newStore() *cacheStore { 207 // TODO: We may consider created a dedicated store keeping just a single 208 // item, instead of using a generic store implementation for this purpose. 209 // However, simple benchmarks show that memory overhead in that case is 210 // decrease from ~600B to ~300B per object. So we are not optimizing it 211 // until we will see a good reason for that. 212 store := cache.NewStore(cache.MetaNamespaceKeyFunc) 213 return &cacheStore{store, sync.Mutex{}, false} 214 } 215 216 func (c *objectCache) newReflectorLocked(namespace, name string) *objectCacheItem { 217 fieldSelector := fields.Set{"metadata.name": name}.AsSelector().String() 218 listFunc := func(options metav1.ListOptions) (runtime.Object, error) { 219 options.FieldSelector = fieldSelector 220 return c.listObject(namespace, options) 221 } 222 watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { 223 options.FieldSelector = fieldSelector 224 return c.watchObject(namespace, options) 225 } 226 store := c.newStore() 227 reflector := cache.NewNamedReflector( 228 fmt.Sprintf("object-%q/%q", namespace, name), 229 &cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc}, 230 c.newObject(), 231 store, 232 0, 233 ) 234 item := &objectCacheItem{ 235 refMap: make(map[types.UID]int), 236 store: store, 237 reflector: reflector, 238 hasSynced: func() (bool, error) { return store.hasSynced(), nil }, 239 stopCh: make(chan struct{}), 240 } 241 242 // Don't start reflector if Kubelet is already shutting down. 243 if !c.stopped { 244 go item.startReflector() 245 } 246 return item 247 } 248 249 func (c *objectCache) AddReference(namespace, name string, referencedFrom types.UID) { 250 key := objectKey{namespace: namespace, name: name} 251 252 // AddReference is called from RegisterPod thus it needs to be efficient. 253 // Thus, it is only increasing refCount and in case of first registration 254 // of a given object it starts corresponding reflector. 255 // It's responsibility of the first Get operation to wait until the 256 // reflector propagated the store. 257 c.lock.Lock() 258 defer c.lock.Unlock() 259 item, exists := c.items[key] 260 if !exists { 261 item = c.newReflectorLocked(namespace, name) 262 c.items[key] = item 263 } 264 item.refMap[referencedFrom]++ 265 } 266 267 func (c *objectCache) DeleteReference(namespace, name string, referencedFrom types.UID) { 268 key := objectKey{namespace: namespace, name: name} 269 270 c.lock.Lock() 271 defer c.lock.Unlock() 272 if item, ok := c.items[key]; ok { 273 item.refMap[referencedFrom]-- 274 if item.refMap[referencedFrom] == 0 { 275 delete(item.refMap, referencedFrom) 276 } 277 if len(item.refMap) == 0 { 278 // Stop the underlying reflector. 279 item.stop() 280 delete(c.items, key) 281 } 282 } 283 } 284 285 // key returns key of an object with a given name and namespace. 286 // This has to be in-sync with cache.MetaNamespaceKeyFunc. 287 func (c *objectCache) key(namespace, name string) string { 288 if len(namespace) > 0 { 289 return namespace + "/" + name 290 } 291 return name 292 } 293 294 func (c *objectCache) isStopped() bool { 295 c.lock.RLock() 296 defer c.lock.RUnlock() 297 return c.stopped 298 } 299 300 func (c *objectCache) Get(namespace, name string) (runtime.Object, error) { 301 key := objectKey{namespace: namespace, name: name} 302 303 c.lock.RLock() 304 item, exists := c.items[key] 305 c.lock.RUnlock() 306 307 if !exists { 308 return nil, fmt.Errorf("object %q/%q not registered", namespace, name) 309 } 310 // Record last access time independently if it succeeded or not. 311 // This protects from premature (racy) reflector closure. 312 item.setLastAccessTime(c.clock.Now()) 313 314 // Don't restart reflector if Kubelet is already shutting down. 315 if !c.isStopped() { 316 item.restartReflectorIfNeeded() 317 } 318 if err := wait.PollImmediate(10*time.Millisecond, time.Second, item.hasSynced); err != nil { 319 return nil, fmt.Errorf("failed to sync %s cache: %v", c.groupResource.String(), err) 320 } 321 obj, exists, err := item.store.GetByKey(c.key(namespace, name)) 322 if err != nil { 323 return nil, err 324 } 325 if !exists { 326 return nil, apierrors.NewNotFound(c.groupResource, name) 327 } 328 if object, ok := obj.(runtime.Object); ok { 329 // If the returned object is immutable, stop the reflector. 330 // 331 // NOTE: we may potentially not even start the reflector if the object is 332 // already immutable. However, given that: 333 // - we want to also handle the case when object is marked as immutable later 334 // - Secrets and ConfigMaps are periodically fetched by volumemanager anyway 335 // - doing that wouldn't provide visible scalability/performance gain - we 336 // already have it from here 337 // - doing that would require significant refactoring to reflector 338 // we limit ourselves to just quickly stop the reflector here. 339 if c.isImmutable(object) { 340 item.setImmutable() 341 if item.stop() { 342 klog.V(4).InfoS("Stopped watching for changes - object is immutable", "obj", klog.KRef(namespace, name)) 343 } 344 } 345 return object, nil 346 } 347 return nil, fmt.Errorf("unexpected object type: %v", obj) 348 } 349 350 func (c *objectCache) startRecycleIdleWatch() { 351 c.lock.Lock() 352 defer c.lock.Unlock() 353 354 for key, item := range c.items { 355 if item.stopIfIdle(c.clock.Now(), c.maxIdleTime) { 356 klog.V(4).InfoS("Not acquired for long time, Stopped watching for changes", "objectKey", key, "maxIdleTime", c.maxIdleTime) 357 } 358 } 359 } 360 361 func (c *objectCache) shutdownWhenStopped(stopCh <-chan struct{}) { 362 <-stopCh 363 364 c.lock.Lock() 365 defer c.lock.Unlock() 366 367 c.stopped = true 368 for _, item := range c.items { 369 item.stop() 370 } 371 } 372 373 // NewWatchBasedManager creates a manager that keeps a cache of all objects 374 // necessary for registered pods. 375 // It implements the following logic: 376 // - whenever a pod is created or updated, we start individual watches for all 377 // referenced objects that aren't referenced from other registered pods 378 // - every GetObject() returns a value from local cache propagated via watches 379 func NewWatchBasedManager( 380 listObject listObjectFunc, 381 watchObject watchObjectFunc, 382 newObject newObjectFunc, 383 isImmutable isImmutableFunc, 384 groupResource schema.GroupResource, 385 resyncInterval time.Duration, 386 getReferencedObjects func(*v1.Pod) sets.String) Manager { 387 388 // If a configmap/secret is used as a volume, the volumeManager will visit the objectCacheItem every resyncInterval cycle, 389 // We just want to stop the objectCacheItem referenced by environment variables, 390 // So, maxIdleTime is set to an integer multiple of resyncInterval, 391 // We currently set it to 5 times. 392 maxIdleTime := resyncInterval * 5 393 394 // TODO propagate stopCh from the higher level. 395 objectStore := NewObjectCache(listObject, watchObject, newObject, isImmutable, groupResource, clock.RealClock{}, maxIdleTime, wait.NeverStop) 396 return NewCacheBasedManager(objectStore, getReferencedObjects) 397 }