github.com/cilium/cilium@v1.16.2/pkg/k8s/resource/resource.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package resource 5 6 import ( 7 "context" 8 "fmt" 9 "reflect" 10 "runtime" 11 "strconv" 12 "strings" 13 "sync" 14 15 "github.com/cilium/hive/cell" 16 "github.com/cilium/stream" 17 corev1 "k8s.io/api/core/v1" 18 "k8s.io/apimachinery/pkg/api/meta" 19 k8sRuntime "k8s.io/apimachinery/pkg/runtime" 20 "k8s.io/apimachinery/pkg/types" 21 "k8s.io/client-go/tools/cache" 22 "k8s.io/client-go/util/workqueue" 23 "sigs.k8s.io/controller-runtime/pkg/client/apiutil" 24 25 k8smetrics "github.com/cilium/cilium/pkg/k8s/metrics" 26 "github.com/cilium/cilium/pkg/k8s/synced" 27 "github.com/cilium/cilium/pkg/k8s/watchers/resources" 28 "github.com/cilium/cilium/pkg/lock" 29 "github.com/cilium/cilium/pkg/metrics" 30 "github.com/cilium/cilium/pkg/promise" 31 ) 32 33 // Resource provides access to a Kubernetes resource through either 34 // a stream of events or a read-only store. 35 // 36 // Observing of the events can be done from a constructor as subscriber 37 // registration is non-blocking. 38 // 39 // Store() however should only be called from a start hook, or from a 40 // goroutine forked from the start hook as it blocks until the store 41 // has been synchronized. 42 // 43 // The subscriber can process the events from Events() asynchronously and in 44 // parallel, but for each event the Done() function must be called to mark 45 // the event as handled. If not done no new events will be emitted for this key. 46 // If an event handling is marked as failed the configured error handler is called 47 // (WithErrorHandler). The default error handler will requeue the event (by its key) for 48 // later retried processing. The requeueing is rate limited and can be configured with 49 // WithRateLimiter option to Events(). 50 // 51 // The resource is lazy, e.g. it will not start the informer until a call 52 // has been made to Events() or Store(). 53 type Resource[T k8sRuntime.Object] interface { 54 // Resource can be observed either via Observe() or via Events(). The observable 55 // is implemented in terms of Events() and same semantics apply. 56 stream.Observable[Event[T]] 57 58 // Events returns a channel of events. Each event must be marked as handled 59 // with a call to Done() which marks the key processed. No new events for this key 60 // will be emitted before Done() is called. 61 // 62 // A missing Done() will lead to an eventual panic (via finalizer on Event[T]). 63 // Panic on this situation is needed as otherwise no new events would be emitted 64 // and thus this needs to be enforced. 65 // 66 // A stream of Upsert events are emitted first to replay the current state of the 67 // store after which incremental upserts and deletes follow until the underlying 68 // store is synchronized after which a Sync event is emitted and further incremental 69 // updates: 70 // 71 // (start observing), Upsert, Upsert, Upsert, (done replaying store contents), Upsert, Upsert, 72 // (store synchronized with API server), Sync, Upsert, Delete, Upsert, ... 73 // 74 // The emitting of the Sync event does not depend on whether or not Upsert events have 75 // all been marked Done() without an error. The sync event solely signals that the underlying 76 // store has synchronized and that Upsert events for objects in a synchronized store have been 77 // sent to the observer. 78 // 79 // When Done() is called with non-nil error the error handler is invoked, which 80 // can ignore, requeue the event (by key) or close the channel. The default error handler 81 // will requeue. 82 // 83 // If an Upsert is retried and the object has been deleted, a Delete event will be emitted instead. 84 // Conversely if a Delete event is retried and the object has been recreated with the same key, 85 // an Upsert will be emitted instead. 86 // 87 // If an objects is created and immediately deleted, then a slow observer may not observe this at 88 // all. In all cases a Delete event is only emitted if the observer has seen an Upsert. Whether or 89 // not it had been successfully handled (via Done(nil)) does not affect this property. 90 Events(ctx context.Context, opts ...EventsOpt) <-chan Event[T] 91 92 // Store retrieves the read-only store for the resource. Blocks until 93 // the store has been synchronized or the context cancelled. 94 // Returns a non-nil error if context is cancelled or the resource 95 // has been stopped before store has synchronized. 96 Store(context.Context) (Store[T], error) 97 } 98 99 // New creates a new Resource[T]. Use with hive.Provide: 100 // 101 // var exampleCell = hive.Module( 102 // "example", 103 // cell.Provide( 104 // // Provide `Resource[*slim_corev1.Pod]` to the hive: 105 // func(lc cell.Lifecycle, c k8sClient.Clientset) resource.Resource[*slim_corev1.Pod] { 106 // lw := utils.ListerWatcherFromTyped[*slim_corev1.PodList]( 107 // c.Slim().CoreV1().Pods(""), 108 // ) 109 // return resource.New(lc, lw) 110 // } 111 // }), 112 // ... 113 // ) 114 // 115 // func usePods(pods resource.Resource[*slim_corev1.Pod]) { 116 // go func() { 117 // for ev := range podEvents { 118 // onPodEvent(ev) 119 // } 120 // } 121 // return e 122 // } 123 // func onPodEvent(event resource.Event[*slim_core.Pod]) { 124 // switch event.Kind { 125 // case resource.Sync: 126 // // Pods have now been synced and the set of Upsert events 127 // // received thus far forms a coherent snapshot. 128 // 129 // // Must always call event.Done(error) to mark the event as processed. 130 // event.Done(nil) 131 // case resource.Upsert: 132 // event.Done(onPodUpsert(event.Object)) 133 // case resource.Delete: 134 // event.Done(onPodDelete(event.Object)) 135 // } 136 // } 137 // 138 // See also pkg/k8s/resource/example/main.go for a runnable example. 139 func New[T k8sRuntime.Object](lc cell.Lifecycle, lw cache.ListerWatcher, opts ...ResourceOption) Resource[T] { 140 r := &resource[T]{ 141 lw: lw, 142 } 143 r.opts.sourceObj = func() k8sRuntime.Object { 144 var obj T 145 return obj 146 } 147 for _, o := range opts { 148 o(&r.opts) 149 } 150 r.ctx, r.cancel = context.WithCancel(context.Background()) 151 r.reset() 152 lc.Append(r) 153 return r 154 } 155 156 type options struct { 157 transform cache.TransformFunc // if non-nil, the object is transformed with this function before storing 158 sourceObj func() k8sRuntime.Object // prototype for the object before it is transformed 159 indexers cache.Indexers // map of the optional custom indexers to be added to the underlying resource informer 160 metricScope string // the scope label used when recording metrics for the resource 161 name string // the name label used for the workqueue metrics 162 releasable bool // if true, the underlying informer will be stopped when the last subscriber cancels its subscription 163 crdSyncPromise promise.Promise[synced.CRDSync] // optional promise to wait for 164 } 165 166 type ResourceOption func(o *options) 167 168 // WithTransform sets the function to transform the object before storing it. 169 func WithTransform[From, To k8sRuntime.Object](transform func(From) (To, error)) ResourceOption { 170 return WithLazyTransform( 171 func() k8sRuntime.Object { 172 var obj From 173 return obj 174 }, 175 func(fromRaw any) (any, error) { 176 if from, ok := fromRaw.(From); ok { 177 to, err := transform(from) 178 return to, err 179 } else { 180 var obj From 181 return nil, fmt.Errorf("resource.WithTransform: expected %T, got %T", obj, fromRaw) 182 } 183 }) 184 } 185 186 // WithLazyTransform sets the function to transform the object before storing it. 187 // Unlike "WithTransform", this defers the resolving of the source object type until the resource 188 // is needed. Use this in situations where the source object depends on api-server capabilities. 189 func WithLazyTransform(sourceObj func() k8sRuntime.Object, transform cache.TransformFunc) ResourceOption { 190 return func(o *options) { 191 o.sourceObj = sourceObj 192 o.transform = transform 193 } 194 } 195 196 // WithMetric enables metrics collection for the resource using the provided scope. 197 func WithMetric(scope string) ResourceOption { 198 return func(o *options) { 199 o.metricScope = scope 200 } 201 } 202 203 // WithIndexers sets additional custom indexers on the resource store. 204 func WithIndexers(indexers cache.Indexers) ResourceOption { 205 return func(o *options) { 206 o.indexers = indexers 207 } 208 } 209 210 // WithName sets the name of the resource. Used for workqueue metrics. 211 func WithName(name string) ResourceOption { 212 return func(o *options) { 213 o.name = name 214 } 215 } 216 217 func WithCRDSync(crdSyncPromise promise.Promise[synced.CRDSync]) ResourceOption { 218 return func(o *options) { 219 o.crdSyncPromise = crdSyncPromise 220 } 221 } 222 223 // WithStoppableInformer marks the resource as releasable. A releasable resource stops 224 // the underlying informer if the last active subscriber cancels its subscription. 225 // In this case the resource is stopped and prepared again for a subsequent call to 226 // either Events() or Store(). 227 // A subscriber is a consumer who has taken a reference to the store with Store() or that 228 // is listening to the events stream channel with Events(). 229 // This option is meant to be used for very specific cases of resources with a high rate 230 // of updates that can potentially hinder scalability in very large clusters, like 231 // CiliumNode and CiliumEndpoint. 232 // For this cases, stopping the informer is required when switching to other data sources 233 // that scale better. 234 func WithStoppableInformer() ResourceOption { 235 return func(o *options) { 236 o.releasable = true 237 } 238 } 239 240 type resource[T k8sRuntime.Object] struct { 241 mu lock.RWMutex 242 ctx context.Context 243 cancel context.CancelFunc 244 wg sync.WaitGroup 245 opts options 246 247 needed chan struct{} 248 249 subscribers map[uint64]*subscriber[T] 250 subId uint64 251 252 lw cache.ListerWatcher 253 synchronized bool // flipped to true when informer has synced. 254 255 storePromise promise.Promise[Store[T]] 256 storeResolver promise.Resolver[Store[T]] 257 258 // meaningful for releasable resources only 259 refsMu lock.Mutex 260 refs uint64 261 resetCtx context.Context 262 resetCancel context.CancelFunc 263 } 264 265 var _ Resource[*corev1.Node] = &resource[*corev1.Node]{} 266 267 func (r *resource[T]) Store(ctx context.Context) (Store[T], error) { 268 r.markNeeded() 269 270 // Wait until store has synchronized to avoid querying a store 271 // that has not finished the initial listing. 272 hasSynced := func() bool { 273 r.mu.RLock() 274 defer r.mu.RUnlock() 275 return r.synchronized 276 } 277 cache.WaitForCacheSync(ctx.Done(), hasSynced) 278 279 // use an error handler to release the resource if the store promise 280 // is rejected or the context is cancelled before the cache has synchronized. 281 return promise.MapError(r.storePromise, func(err error) error { 282 r.release() 283 return err 284 }).Await(ctx) 285 } 286 287 func (r *resource[T]) metricEventProcessed(eventKind EventKind, status bool) { 288 if r.opts.metricScope == "" { 289 return 290 } 291 292 result := "success" 293 if !status { 294 result = "failed" 295 } 296 297 var action string 298 switch eventKind { 299 case Sync: 300 return 301 case Upsert: 302 action = "update" 303 case Delete: 304 action = "delete" 305 } 306 307 metrics.KubernetesEventProcessed.WithLabelValues(r.opts.metricScope, action, result).Inc() 308 } 309 310 func (r *resource[T]) metricEventReceived(action string, valid, equal bool) { 311 if r.opts.metricScope == "" { 312 return 313 } 314 315 k8smetrics.LastInteraction.Reset() 316 317 metrics.EventTS.WithLabelValues(metrics.LabelEventSourceK8s, r.opts.metricScope, action).SetToCurrentTime() 318 validStr := strconv.FormatBool(valid) 319 equalStr := strconv.FormatBool(equal) 320 metrics.KubernetesEventReceived.WithLabelValues(r.opts.metricScope, action, validStr, equalStr).Inc() 321 } 322 323 func (r *resource[T]) Start(cell.HookContext) error { 324 r.start() 325 return nil 326 } 327 328 func (r *resource[T]) start() { 329 // Don't start the resource if it has been definitely stopped 330 if r.ctx.Err() != nil { 331 return 332 } 333 r.wg.Add(1) 334 go r.startWhenNeeded() 335 } 336 337 func (r *resource[T]) markNeeded() { 338 if r.opts.releasable { 339 r.refsMu.Lock() 340 r.refs++ 341 r.refsMu.Unlock() 342 } 343 344 select { 345 case r.needed <- struct{}{}: 346 default: 347 } 348 } 349 350 func (r *resource[T]) startWhenNeeded() { 351 defer r.wg.Done() 352 353 // Wait until we're needed before starting the informer. 354 select { 355 case <-r.ctx.Done(): 356 return 357 case <-r.needed: 358 } 359 360 // Short-circuit if we're being stopped. 361 if r.ctx.Err() != nil { 362 return 363 } 364 365 // Wait for CRDs to have synced before trying to access (Cilium) k8s resources 366 if r.opts.crdSyncPromise != nil { 367 r.opts.crdSyncPromise.Await(r.ctx) 368 } 369 370 store, informer := r.newInformer() 371 r.storeResolver.Resolve(&typedStore[T]{ 372 store: store, 373 release: r.release, 374 }) 375 376 r.wg.Add(1) 377 go func() { 378 defer r.wg.Done() 379 informer.Run(merge(r.ctx.Done(), r.resetCtx.Done())) 380 }() 381 382 // Wait for cache to be synced before emitting the sync event. 383 if cache.WaitForCacheSync(merge(r.ctx.Done(), r.resetCtx.Done()), informer.HasSynced) { 384 // Emit the sync event for all subscribers. Subscribers 385 // that subscribe afterwards will emit it by checking 386 // r.synchronized. 387 r.mu.Lock() 388 for _, sub := range r.subscribers { 389 sub.enqueueSync() 390 } 391 r.synchronized = true 392 r.mu.Unlock() 393 } 394 } 395 396 func (r *resource[T]) Stop(stopCtx cell.HookContext) error { 397 if r.opts.releasable { 398 // grab the refs lock to avoid a concurrent restart for releasable resource 399 r.refsMu.Lock() 400 defer r.refsMu.Unlock() 401 } 402 403 r.cancel() 404 r.wg.Wait() 405 return nil 406 } 407 408 type eventsOpts struct { 409 rateLimiter workqueue.RateLimiter 410 errorHandler ErrorHandler 411 } 412 413 type EventsOpt func(*eventsOpts) 414 415 // WithRateLimiter sets the rate limiting algorithm to be used when requeueing failed events. 416 func WithRateLimiter(r workqueue.RateLimiter) EventsOpt { 417 return func(o *eventsOpts) { 418 o.rateLimiter = r 419 } 420 } 421 422 // WithErrorHandler specifies the error handling strategy for failed events. By default 423 // the strategy is to always requeue the processing of a failed event. 424 func WithErrorHandler(h ErrorHandler) EventsOpt { 425 return func(o *eventsOpts) { 426 o.errorHandler = h 427 } 428 } 429 430 func (r *resource[T]) Observe(ctx context.Context, next func(Event[T]), complete func(error)) { 431 stream.FromChannel(r.Events(ctx)).Observe(ctx, next, complete) 432 } 433 434 // Events subscribes the caller to resource events. 435 // 436 // Each subscriber has their own queues and can process events at their own 437 // rate. Only object keys are queued and if an object is changed multiple times 438 // before the subscriber can handle the event only the latest state of object 439 // is emitted. 440 // 441 // The 'ctx' is used to cancel the subscription. The returned channel will be 442 // closed when context is cancelled. 443 // 444 // Options are supported to configure rate limiting of retries 445 // (WithRateLimiter), error handling strategy (WithErrorHandler). 446 // 447 // By default all errors are retried, the default rate limiter of workqueue 448 // package is used and the channel is unbuffered. 449 func (r *resource[T]) Events(ctx context.Context, opts ...EventsOpt) <-chan Event[T] { 450 _, callerFile, callerLine, _ := runtime.Caller(1) 451 debugInfo := fmt.Sprintf("%T.Events() called from %s:%d", r, callerFile, callerLine) 452 453 options := eventsOpts{ 454 errorHandler: AlwaysRetry, // Default error handling is to always retry. 455 rateLimiter: workqueue.DefaultControllerRateLimiter(), 456 } 457 for _, apply := range opts { 458 apply(&options) 459 } 460 461 // Mark the resource as needed. This will start the informer if it was not already. 462 r.markNeeded() 463 464 out := make(chan Event[T]) 465 ctx, subCancel := context.WithCancel(ctx) 466 467 sub := &subscriber[T]{ 468 r: r, 469 options: options, 470 debugInfo: debugInfo, 471 wq: workqueue.NewRateLimitingQueueWithConfig(options.rateLimiter, 472 workqueue.RateLimitingQueueConfig{Name: r.resourceName()}), 473 } 474 475 // Fork a goroutine to process the queued keys and pass them to the subscriber. 476 r.wg.Add(1) 477 go func() { 478 defer r.release() 479 defer r.wg.Done() 480 defer close(out) 481 482 // Grab a handle to the store. Asynchronous as informer is started in the background. 483 store, err := r.storePromise.Await(ctx) 484 if err != nil { 485 // Subscriber cancelled before the informer started, bail out. 486 return 487 } 488 489 r.mu.Lock() 490 subId := r.subId 491 r.subId++ 492 r.subscribers[subId] = sub 493 494 // Populate the queue with the initial set of keys that are already 495 // in the store. Done under the resource lock to synchronize with delta 496 // processing to make sure we don't end up queuing the key as initial key, 497 // processing it and then requeuing it again. 498 initialKeys := store.IterKeys() 499 for initialKeys.Next() { 500 sub.enqueueKey(initialKeys.Key()) 501 } 502 503 // If the informer is already synchronized, then the above set of keys is a consistent 504 // snapshot and we can queue the sync entry. If we're not yet synchronized the sync will 505 // be queued from startWhenNeeded() after the informer has synchronized. 506 if r.synchronized { 507 sub.enqueueSync() 508 } 509 r.mu.Unlock() 510 511 sub.processLoop(ctx, out, store) 512 513 r.mu.Lock() 514 delete(r.subscribers, subId) 515 r.mu.Unlock() 516 }() 517 518 // Fork a goroutine to wait for either the subscriber cancelling or the resource 519 // shutting down. 520 r.wg.Add(1) 521 go func() { 522 defer r.wg.Done() 523 select { 524 case <-r.ctx.Done(): 525 case <-r.resetCtx.Done(): 526 case <-ctx.Done(): 527 } 528 subCancel() 529 sub.wq.ShutDownWithDrain() 530 }() 531 532 return out 533 } 534 535 func (r *resource[T]) release() { 536 if !r.opts.releasable { 537 return 538 } 539 540 // in case of a releasable resource, stop the underlying informer when the last 541 // reference to it is released. The resource is restarted to be 542 // ready again in case of a subsequent call to either Events() or Store(). 543 544 r.refsMu.Lock() 545 defer r.refsMu.Unlock() 546 547 r.refs-- 548 if r.refs > 0 { 549 return 550 } 551 552 r.resetCancel() 553 r.wg.Wait() 554 close(r.needed) 555 556 r.reset() 557 r.start() 558 } 559 560 func (r *resource[T]) reset() { 561 r.subscribers = make(map[uint64]*subscriber[T]) 562 r.needed = make(chan struct{}, 1) 563 r.synchronized = false 564 r.storeResolver, r.storePromise = promise.New[Store[T]]() 565 r.resetCtx, r.resetCancel = context.WithCancel(context.Background()) 566 } 567 568 func (r *resource[T]) resourceName() string { 569 if r.opts.name != "" { 570 return r.opts.name 571 } 572 573 // We create a new pointer to the reconciled resource type. 574 // For example, with resource[*cilium_api_v2.CiliumNode] new(T) returns **cilium_api_v2.CiliumNode 575 // and *new(T) is nil. So we create a new pointer using reflect.New() 576 o := *new(T) 577 sourceObj := reflect.New(reflect.TypeOf(o).Elem()).Interface().(T) 578 579 gvk, err := apiutil.GVKForObject(sourceObj, scheme) 580 if err != nil { 581 return "" 582 } 583 584 return strings.ToLower(gvk.Kind) 585 } 586 587 type subscriber[T k8sRuntime.Object] struct { 588 r *resource[T] 589 debugInfo string 590 wq workqueue.RateLimitingInterface 591 options eventsOpts 592 } 593 594 func (s *subscriber[T]) processLoop(ctx context.Context, out chan Event[T], store Store[T]) { 595 // Make sure to call ShutDown() in the end. Calling ShutDownWithDrain is not 596 // enough as DelayingQueue does not implement it, so without ShutDown() we'd 597 // leak the (*delayingType).waitingLoop. 598 defer s.wq.ShutDown() 599 600 doneFinalizer := func(done *bool) { 601 // If you get here it is because an Event[T] was handed to a subscriber 602 // that forgot to call Event[T].Done(). 603 // 604 // Calling Done() is needed to mark the event as handled. This allows 605 // the next event for the same key to be handled and is used to clear 606 // rate limiting and retry counts of prior failures. 607 panic(fmt.Sprintf( 608 "%s has a broken event handler that did not call Done() "+ 609 "before event was garbage collected", 610 s.debugInfo)) 611 } 612 613 // To synthesize delete events to the subscriber we keep track of the last know state 614 // of the object given to the subscriber. Objects are cleaned from this map when delete 615 // events are successfully processed. 616 var lastKnownObjects lastKnownObjects[T] 617 618 loop: 619 for { 620 // Retrieve an item from the subscribers queue and then fetch the object 621 // from the store. 622 workItem, shutdown := s.getWorkItem() 623 if shutdown { 624 break 625 } 626 627 var event Event[T] 628 629 switch workItem := workItem.(type) { 630 case syncWorkItem: 631 event.Kind = Sync 632 case keyWorkItem: 633 obj, exists, err := store.GetByKey(workItem.key) 634 if !exists || err != nil { 635 // The object no longer exists in the store and thus has been deleted. 636 deletedObject, ok := lastKnownObjects.Load(workItem.key) 637 if !ok { 638 // Object was never seen by the subscriber. Ignore the event. 639 s.wq.Done(workItem) 640 continue loop 641 } 642 event.Kind = Delete 643 event.Key = workItem.key 644 event.Object = deletedObject 645 } else { 646 lastKnownObjects.Store(workItem.key, obj) 647 event.Kind = Upsert 648 event.Key = workItem.key 649 event.Object = obj 650 } 651 default: 652 panic(fmt.Sprintf("%T: unknown work item %T", s.r, workItem)) 653 } 654 655 // eventDoneSentinel is a heap allocated object referenced by Done(). 656 // If Done() is not called, a finalizer set on this object will be invoked 657 // which panics. If Done() is called, the finalizer is unset. 658 var eventDoneSentinel = new(bool) 659 event.Done = func(err error) { 660 runtime.SetFinalizer(eventDoneSentinel, nil) 661 662 if err == nil && event.Kind == Delete { 663 // Deletion processed successfully. Remove it from the set of 664 // deleted objects unless it was replaced by an upsert or newer 665 // deletion. 666 lastKnownObjects.DeleteByUID(event.Key, event.Object) 667 } 668 669 s.eventDone(workItem, err) 670 671 s.r.metricEventProcessed(event.Kind, err == nil) 672 } 673 674 // Add a finalizer to catch forgotten calls to Done(). 675 runtime.SetFinalizer(eventDoneSentinel, doneFinalizer) 676 677 select { 678 case out <- event: 679 case <-ctx.Done(): 680 // Subscriber cancelled or resource is shutting down. We're not requiring 681 // the subscriber to drain the channel, so we're marking the event done here 682 // and not sending it. 683 event.Done(nil) 684 685 // Drain the queue without further processing. 686 for { 687 _, shutdown := s.getWorkItem() 688 if shutdown { 689 return 690 } 691 } 692 } 693 } 694 } 695 696 func (s *subscriber[T]) getWorkItem() (e workItem, shutdown bool) { 697 var raw any 698 raw, shutdown = s.wq.Get() 699 if shutdown { 700 return 701 } 702 return raw.(workItem), false 703 } 704 705 func (s *subscriber[T]) enqueueSync() { 706 s.wq.Add(syncWorkItem{}) 707 } 708 709 func (s *subscriber[T]) enqueueKey(key Key) { 710 s.wq.Add(keyWorkItem{key}) 711 } 712 713 func (s *subscriber[T]) eventDone(entry workItem, err error) { 714 // This is based on the example found in k8s.io/client-go/examples/worsueue/main.go. 715 716 // Mark the object as done being processed. If it was marked dirty 717 // during processing, it'll be processed again. 718 defer s.wq.Done(entry) 719 720 if err != nil { 721 numRequeues := s.wq.NumRequeues(entry) 722 723 var action ErrorAction 724 switch entry := entry.(type) { 725 case syncWorkItem: 726 action = s.options.errorHandler(Key{}, numRequeues, err) 727 case keyWorkItem: 728 action = s.options.errorHandler(entry.key, numRequeues, err) 729 default: 730 panic(fmt.Sprintf("keyQueue: unhandled entry %T", entry)) 731 } 732 733 switch action { 734 case ErrorActionRetry: 735 s.wq.AddRateLimited(entry) 736 case ErrorActionStop: 737 s.wq.ShutDown() 738 case ErrorActionIgnore: 739 s.wq.Forget(entry) 740 default: 741 panic(fmt.Sprintf("keyQueue: unknown action %q from error handler %v", action, s.options.errorHandler)) 742 } 743 } else { 744 // As the object was processed successfully we can "forget" it. 745 // This clears any rate limiter state associated with this object, so 746 // it won't be throttled based on previous failure history. 747 s.wq.Forget(entry) 748 } 749 } 750 751 // lastKnownObjects stores the last known state of an object from a subscriber's 752 // perspective. It is used to emit delete events with the last known state of 753 // the object. 754 type lastKnownObjects[T k8sRuntime.Object] struct { 755 mu lock.RWMutex 756 objs map[Key]T 757 } 758 759 func (l *lastKnownObjects[T]) Load(key Key) (obj T, ok bool) { 760 l.mu.RLock() 761 defer l.mu.RUnlock() 762 obj, ok = l.objs[key] 763 return 764 } 765 766 func (l *lastKnownObjects[T]) Store(key Key, obj T) { 767 l.mu.Lock() 768 defer l.mu.Unlock() 769 if l.objs == nil { 770 l.objs = map[Key]T{} 771 } 772 l.objs[key] = obj 773 } 774 775 // DeleteByUID removes the object, but only if the UID matches. UID 776 // might not match if the object has been re-created with the same key 777 // after deletion and thus Store'd again here. Once that incarnation 778 // is deleted, we will be here again and the UID will match. 779 func (l *lastKnownObjects[T]) DeleteByUID(key Key, objToDelete T) { 780 l.mu.Lock() 781 defer l.mu.Unlock() 782 783 if obj, ok := l.objs[key]; ok { 784 if getUID(obj) == getUID(objToDelete) { 785 delete(l.objs, key) 786 } 787 } 788 } 789 790 // workItem restricts the set of types we use when type-switching over the 791 // queue entries, so that we'll get a compiler error on impossible types. 792 // 793 // The queue entries must be kept comparable and not be pointers as we want 794 // to be able to coalesce multiple keyEntry's into a single element in the 795 // queue. 796 type workItem interface { 797 isWorkItem() 798 } 799 800 // syncWorkItem marks the store as synchronized and thus a 'Sync' event can be 801 // emitted to the subscriber. 802 type syncWorkItem struct{} 803 804 func (syncWorkItem) isWorkItem() {} 805 806 // keyWorkItem marks work for a specific key. Whether this is an upsert or delete 807 // depends on the state of the store at the time this work item is processed. 808 type keyWorkItem struct { 809 key Key 810 } 811 812 func (keyWorkItem) isWorkItem() {} 813 814 type wrapperController struct { 815 cache.Controller 816 cacheMutationDetector cache.MutationDetector 817 } 818 819 func (p *wrapperController) Run(stopCh <-chan struct{}) { 820 go p.cacheMutationDetector.Run(stopCh) 821 p.Controller.Run(stopCh) 822 } 823 824 func (r *resource[T]) newInformer() (cache.Indexer, cache.Controller) { 825 clientState := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, r.opts.indexers) 826 opts := cache.DeltaFIFOOptions{KeyFunction: cache.MetaNamespaceKeyFunc, KnownObjects: clientState} 827 fifo := cache.NewDeltaFIFOWithOptions(opts) 828 transformer := r.opts.transform 829 cacheMutationDetector := cache.NewCacheMutationDetector(fmt.Sprintf("%T", r)) 830 cfg := &cache.Config{ 831 Queue: fifo, 832 ListerWatcher: r.lw, 833 ObjectType: r.opts.sourceObj(), 834 FullResyncPeriod: 0, 835 RetryOnError: false, 836 Process: func(obj interface{}, isInInitialList bool) error { 837 // Processing of the deltas is done under the resource mutex. This 838 // avoids emitting double events for new subscribers that list the 839 // keys in the store. 840 r.mu.RLock() 841 defer r.mu.RUnlock() 842 843 for _, d := range obj.(cache.Deltas) { 844 var obj interface{} 845 if transformer != nil { 846 var err error 847 if obj, err = transformer(d.Object); err != nil { 848 return err 849 } 850 } else { 851 obj = d.Object 852 } 853 854 // In CI we detect if the objects were modified and panic 855 // (e.g. when KUBE_CACHE_MUTATION_DETECTOR is set) 856 // this is a no-op in production environments. 857 cacheMutationDetector.AddObject(obj) 858 859 key := NewKey(obj) 860 861 switch d.Type { 862 case cache.Sync, cache.Added, cache.Updated: 863 metric := resources.MetricCreate 864 if d.Type != cache.Added { 865 metric = resources.MetricUpdate 866 } 867 r.metricEventReceived(metric, true, false) 868 869 if _, exists, err := clientState.Get(obj); err == nil && exists { 870 if err := clientState.Update(obj); err != nil { 871 return err 872 } 873 } else { 874 if err := clientState.Add(obj); err != nil { 875 return err 876 } 877 } 878 879 for _, sub := range r.subscribers { 880 sub.enqueueKey(key) 881 } 882 case cache.Deleted: 883 r.metricEventReceived(resources.MetricDelete, true, false) 884 885 if err := clientState.Delete(obj); err != nil { 886 return err 887 } 888 889 for _, sub := range r.subscribers { 890 sub.enqueueKey(key) 891 } 892 } 893 } 894 return nil 895 }, 896 } 897 return clientState, &wrapperController{ 898 Controller: cache.New(cfg), 899 cacheMutationDetector: cacheMutationDetector, 900 } 901 } 902 903 func getUID(obj k8sRuntime.Object) types.UID { 904 meta, err := meta.Accessor(obj) 905 if err != nil { 906 // If we get here, it means the object does not implement ObjectMeta, and thus 907 // the Resource[T] has been instantianted with an unsuitable type T. 908 // As this would be catched immediately during development, panicing is the 909 // way. 910 panic(fmt.Sprintf("BUG: meta.Accessor() failed on %T: %s", obj, err)) 911 } 912 return meta.GetUID() 913 } 914 915 func merge[T any](c1, c2 <-chan T) <-chan T { 916 m := make(chan T) 917 go func() { 918 select { 919 case <-c1: 920 case <-c2: 921 } 922 close(m) 923 }() 924 return m 925 }