k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/endpointslicemirroring/endpointslicemirroring_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package endpointslicemirroring 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "golang.org/x/time/rate" 25 26 v1 "k8s.io/api/core/v1" 27 discovery "k8s.io/api/discovery/v1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 "k8s.io/apimachinery/pkg/labels" 30 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 31 "k8s.io/apimachinery/pkg/util/wait" 32 coreinformers "k8s.io/client-go/informers/core/v1" 33 discoveryinformers "k8s.io/client-go/informers/discovery/v1" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/client-go/kubernetes/scheme" 36 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 37 corelisters "k8s.io/client-go/listers/core/v1" 38 discoverylisters "k8s.io/client-go/listers/discovery/v1" 39 "k8s.io/client-go/tools/cache" 40 "k8s.io/client-go/tools/record" 41 "k8s.io/client-go/util/workqueue" 42 endpointsliceutil "k8s.io/endpointslice/util" 43 "k8s.io/klog/v2" 44 "k8s.io/kubernetes/pkg/controller" 45 "k8s.io/kubernetes/pkg/controller/endpointslicemirroring/metrics" 46 endpointslicepkg "k8s.io/kubernetes/pkg/controller/util/endpointslice" 47 ) 48 49 const ( 50 // maxRetries is the number of times an Endpoints resource will be retried 51 // before it is dropped out of the queue. Any sync error, such as a failure 52 // to create or update an EndpointSlice could trigger a retry. With the 53 // current rate-limiter in use (1s*2^(numRetries-1)) up to a max of 100s. 54 // The following numbers represent the sequence of delays between successive 55 // queuings of an Endpoints resource. 56 // 57 // 1s, 2s, 4s, 8s, 16s, 32s, 64s, 100s (max) 58 maxRetries = 15 59 60 // defaultSyncBackOff is the default backoff period for syncEndpoints calls. 61 defaultSyncBackOff = 1 * time.Second 62 // maxSyncBackOff is the max backoff period for syncEndpoints calls. 63 maxSyncBackOff = 100 * time.Second 64 65 // controllerName is a unique value used with LabelManagedBy to indicated 66 // the component managing an EndpointSlice. 67 controllerName = "endpointslicemirroring-controller.k8s.io" 68 ) 69 70 // NewController creates and initializes a new Controller 71 func NewController(ctx context.Context, endpointsInformer coreinformers.EndpointsInformer, 72 endpointSliceInformer discoveryinformers.EndpointSliceInformer, 73 serviceInformer coreinformers.ServiceInformer, 74 maxEndpointsPerSubset int32, 75 client clientset.Interface, 76 endpointUpdatesBatchPeriod time.Duration, 77 ) *Controller { 78 logger := klog.FromContext(ctx) 79 broadcaster := record.NewBroadcaster(record.WithContext(ctx)) 80 recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-slice-mirroring-controller"}) 81 82 metrics.RegisterMetrics() 83 84 c := &Controller{ 85 client: client, 86 // This is similar to the DefaultControllerRateLimiter, just with a 87 // significantly higher default backoff (1s vs 5ms). This controller 88 // processes events that can require significant EndpointSlice changes. 89 // A more significant rate limit back off here helps ensure that the 90 // Controller does not overwhelm the API Server. 91 queue: workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.NewTypedMaxOfRateLimiter( 92 workqueue.NewTypedItemExponentialFailureRateLimiter[string](defaultSyncBackOff, maxSyncBackOff), 93 // 10 qps, 100 bucket size. This is only for retry speed and its 94 // only the overall factor (not per item). 95 &workqueue.TypedBucketRateLimiter[string]{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, 96 ), 97 workqueue.TypedRateLimitingQueueConfig[string]{ 98 Name: "endpoint_slice_mirroring", 99 }, 100 ), 101 workerLoopPeriod: time.Second, 102 } 103 104 endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 105 AddFunc: func(obj interface{}) { 106 c.onEndpointsAdd(logger, obj) 107 }, 108 UpdateFunc: func(oldObj, newObj interface{}) { 109 c.onEndpointsUpdate(logger, oldObj, newObj) 110 }, 111 DeleteFunc: func(obj interface{}) { 112 c.onEndpointsDelete(logger, obj) 113 }, 114 }) 115 c.endpointsLister = endpointsInformer.Lister() 116 c.endpointsSynced = endpointsInformer.Informer().HasSynced 117 118 endpointSliceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 119 AddFunc: c.onEndpointSliceAdd, 120 UpdateFunc: func(oldObj, newObj interface{}) { 121 c.onEndpointSliceUpdate(logger, oldObj, newObj) 122 }, 123 DeleteFunc: c.onEndpointSliceDelete, 124 }) 125 126 c.endpointSliceLister = endpointSliceInformer.Lister() 127 c.endpointSlicesSynced = endpointSliceInformer.Informer().HasSynced 128 c.endpointSliceTracker = endpointsliceutil.NewEndpointSliceTracker() 129 130 c.serviceLister = serviceInformer.Lister() 131 c.servicesSynced = serviceInformer.Informer().HasSynced 132 serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 133 AddFunc: c.onServiceAdd, 134 UpdateFunc: c.onServiceUpdate, 135 DeleteFunc: c.onServiceDelete, 136 }) 137 138 c.maxEndpointsPerSubset = maxEndpointsPerSubset 139 140 c.reconciler = &reconciler{ 141 client: c.client, 142 maxEndpointsPerSubset: c.maxEndpointsPerSubset, 143 endpointSliceTracker: c.endpointSliceTracker, 144 metricsCache: metrics.NewCache(maxEndpointsPerSubset), 145 eventRecorder: recorder, 146 } 147 148 c.eventBroadcaster = broadcaster 149 c.eventRecorder = recorder 150 151 c.endpointUpdatesBatchPeriod = endpointUpdatesBatchPeriod 152 153 return c 154 } 155 156 // Controller manages selector-based service endpoint slices 157 type Controller struct { 158 client clientset.Interface 159 eventBroadcaster record.EventBroadcaster 160 eventRecorder record.EventRecorder 161 162 // endpointsLister is able to list/get endpoints and is populated by the 163 // shared informer passed to NewController. 164 endpointsLister corelisters.EndpointsLister 165 // endpointsSynced returns true if the endpoints shared informer has been 166 // synced at least once. Added as a member to the struct to allow injection 167 // for testing. 168 endpointsSynced cache.InformerSynced 169 170 // endpointSliceLister is able to list/get endpoint slices and is populated 171 // by the shared informer passed to NewController 172 endpointSliceLister discoverylisters.EndpointSliceLister 173 // endpointSlicesSynced returns true if the endpoint slice shared informer 174 // has been synced at least once. Added as a member to the struct to allow 175 // injection for testing. 176 endpointSlicesSynced cache.InformerSynced 177 178 // endpointSliceTracker tracks the list of EndpointSlices and associated 179 // resource versions expected for each Endpoints resource. It can help 180 // determine if a cached EndpointSlice is out of date. 181 endpointSliceTracker *endpointsliceutil.EndpointSliceTracker 182 183 // serviceLister is able to list/get services and is populated by the shared 184 // informer passed to NewController. 185 serviceLister corelisters.ServiceLister 186 // servicesSynced returns true if the services shared informer has been 187 // synced at least once. Added as a member to the struct to allow injection 188 // for testing. 189 servicesSynced cache.InformerSynced 190 191 // reconciler is an util used to reconcile EndpointSlice changes. 192 reconciler *reconciler 193 194 // Endpoints that need to be updated. A channel is inappropriate here, 195 // because it allows Endpoints with lots of addresses to be serviced much 196 // more often than Endpoints with few addresses; it also would cause an 197 // Endpoints resource that's inserted multiple times to be processed more 198 // than necessary. 199 queue workqueue.TypedRateLimitingInterface[string] 200 201 // maxEndpointsPerSubset references the maximum number of endpoints that 202 // should be added to an EndpointSlice for an EndpointSubset. 203 maxEndpointsPerSubset int32 204 205 // workerLoopPeriod is the time between worker runs. The workers process the 206 // queue of changes to Endpoints resources. 207 workerLoopPeriod time.Duration 208 209 // endpointUpdatesBatchPeriod is an artificial delay added to all Endpoints 210 // syncs triggered by EndpointSlice changes. This can be used to reduce 211 // overall number of all EndpointSlice updates. 212 endpointUpdatesBatchPeriod time.Duration 213 } 214 215 // Run will not return until stopCh is closed. 216 func (c *Controller) Run(ctx context.Context, workers int) { 217 defer utilruntime.HandleCrash() 218 219 // Start events processing pipeline. 220 c.eventBroadcaster.StartLogging(klog.Infof) 221 c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.client.CoreV1().Events("")}) 222 defer c.eventBroadcaster.Shutdown() 223 224 defer c.queue.ShutDown() 225 226 logger := klog.FromContext(ctx) 227 logger.Info("Starting EndpointSliceMirroring controller") 228 defer logger.Info("Shutting down EndpointSliceMirroring controller") 229 230 if !cache.WaitForNamedCacheSync("endpoint_slice_mirroring", ctx.Done(), c.endpointsSynced, c.endpointSlicesSynced, c.servicesSynced) { 231 return 232 } 233 234 logger.V(2).Info("Starting worker threads", "total", workers) 235 for i := 0; i < workers; i++ { 236 go wait.Until(func() { c.worker(logger) }, c.workerLoopPeriod, ctx.Done()) 237 } 238 239 <-ctx.Done() 240 } 241 242 // worker runs a worker thread that just dequeues items, processes them, and 243 // marks them done. You may run as many of these in parallel as you wish; the 244 // workqueue guarantees that they will not end up processing the same service 245 // at the same time 246 func (c *Controller) worker(logger klog.Logger) { 247 for c.processNextWorkItem(logger) { 248 } 249 } 250 251 func (c *Controller) processNextWorkItem(logger klog.Logger) bool { 252 cKey, quit := c.queue.Get() 253 if quit { 254 return false 255 } 256 defer c.queue.Done(cKey) 257 258 err := c.syncEndpoints(logger, cKey) 259 c.handleErr(logger, err, cKey) 260 261 return true 262 } 263 264 func (c *Controller) handleErr(logger klog.Logger, err error, key string) { 265 if err == nil { 266 c.queue.Forget(key) 267 return 268 } 269 270 if c.queue.NumRequeues(key) < maxRetries { 271 logger.Info("Error mirroring EndpointSlices for Endpoints, retrying", "key", key, "err", err) 272 c.queue.AddRateLimited(key) 273 return 274 } 275 276 logger.Info("Retry budget exceeded, dropping Endpoints out of the queue", "key", key, "err", err) 277 c.queue.Forget(key) 278 utilruntime.HandleError(err) 279 } 280 281 func (c *Controller) syncEndpoints(logger klog.Logger, key string) error { 282 startTime := time.Now() 283 defer func() { 284 syncDuration := float64(time.Since(startTime).Milliseconds()) / 1000 285 metrics.EndpointsSyncDuration.WithLabelValues().Observe(syncDuration) 286 logger.V(4).Info("Finished syncing EndpointSlices for Endpoints", "key", key, "elapsedTime", time.Since(startTime)) 287 }() 288 289 logger.V(4).Info("syncEndpoints", "key", key) 290 291 namespace, name, err := cache.SplitMetaNamespaceKey(key) 292 if err != nil { 293 return err 294 } 295 296 endpoints, err := c.endpointsLister.Endpoints(namespace).Get(name) 297 if err != nil { 298 if apierrors.IsNotFound(err) { 299 logger.V(4).Info("Endpoints not found, cleaning up any mirrored EndpointSlices", "endpoints", klog.KRef(namespace, name)) 300 c.endpointSliceTracker.DeleteService(namespace, name) 301 return c.deleteMirroredSlices(namespace, name) 302 } 303 return err 304 } 305 306 if !c.shouldMirror(endpoints) { 307 logger.V(4).Info("Endpoints should not be mirrored, cleaning up any mirrored EndpointSlices", "endpoints", klog.KRef(namespace, name)) 308 c.endpointSliceTracker.DeleteService(namespace, name) 309 return c.deleteMirroredSlices(namespace, name) 310 } 311 312 svc, err := c.serviceLister.Services(namespace).Get(name) 313 if err != nil { 314 if apierrors.IsNotFound(err) { 315 logger.V(4).Info("Service not found, cleaning up any mirrored EndpointSlices", "service", klog.KRef(namespace, name)) 316 c.endpointSliceTracker.DeleteService(namespace, name) 317 return c.deleteMirroredSlices(namespace, name) 318 } 319 return err 320 } 321 322 // If a selector is specified, clean up any mirrored slices. 323 if svc.Spec.Selector != nil { 324 logger.V(4).Info("Service now has selector, cleaning up any mirrored EndpointSlices", "service", klog.KRef(namespace, name)) 325 c.endpointSliceTracker.DeleteService(namespace, name) 326 return c.deleteMirroredSlices(namespace, name) 327 } 328 329 endpointSlices, err := endpointSlicesMirroredForService(c.endpointSliceLister, namespace, name) 330 if err != nil { 331 return err 332 } 333 334 if c.endpointSliceTracker.StaleSlices(svc, endpointSlices) { 335 return endpointslicepkg.NewStaleInformerCache("EndpointSlice informer cache is out of date") 336 } 337 338 err = c.reconciler.reconcile(logger, endpoints, endpointSlices) 339 if err != nil { 340 return err 341 } 342 343 return nil 344 } 345 346 // queueEndpoints queues the Endpoints resource for processing. 347 func (c *Controller) queueEndpoints(obj interface{}) { 348 key, err := controller.KeyFunc(obj) 349 if err != nil { 350 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v (type %T): %v", obj, obj, err)) 351 return 352 } 353 354 c.queue.Add(key) 355 } 356 357 // shouldMirror returns true if an Endpoints resource should be mirrored by this 358 // controller. This will be false if: 359 // - the Endpoints resource is nil. 360 // - the Endpoints resource has a skip-mirror label. 361 // - the Endpoints resource has a leader election annotation. 362 // This does not ensure that a corresponding Service exists with a nil selector. 363 // That check should be performed separately. 364 func (c *Controller) shouldMirror(endpoints *v1.Endpoints) bool { 365 if endpoints == nil || skipMirror(endpoints.Labels) || hasLeaderElection(endpoints.Annotations) { 366 return false 367 } 368 369 return true 370 } 371 372 // onServiceAdd queues a sync for the relevant Endpoints resource. 373 func (c *Controller) onServiceAdd(obj interface{}) { 374 service := obj.(*v1.Service) 375 if service == nil { 376 utilruntime.HandleError(fmt.Errorf("onServiceAdd() expected type v1.Service, got %T", obj)) 377 return 378 } 379 if service.Spec.Selector == nil { 380 c.queueEndpoints(obj) 381 } 382 } 383 384 // onServiceUpdate queues a sync for the relevant Endpoints resource. 385 func (c *Controller) onServiceUpdate(prevObj, obj interface{}) { 386 service := obj.(*v1.Service) 387 prevService := prevObj.(*v1.Service) 388 if service == nil || prevService == nil { 389 utilruntime.HandleError(fmt.Errorf("onServiceUpdate() expected type v1.Service, got %T, %T", prevObj, obj)) 390 return 391 } 392 if (service.Spec.Selector == nil) != (prevService.Spec.Selector == nil) { 393 c.queueEndpoints(obj) 394 } 395 } 396 397 // onServiceDelete queues a sync for the relevant Endpoints resource. 398 func (c *Controller) onServiceDelete(obj interface{}) { 399 service := getServiceFromDeleteAction(obj) 400 if service == nil { 401 utilruntime.HandleError(fmt.Errorf("onServiceDelete() expected type v1.Service, got %T", obj)) 402 return 403 } 404 if service.Spec.Selector == nil { 405 c.queueEndpoints(obj) 406 } 407 } 408 409 // onEndpointsAdd queues a sync for the relevant Endpoints resource. 410 func (c *Controller) onEndpointsAdd(logger klog.Logger, obj interface{}) { 411 endpoints := obj.(*v1.Endpoints) 412 if endpoints == nil { 413 utilruntime.HandleError(fmt.Errorf("onEndpointsAdd() expected type v1.Endpoints, got %T", obj)) 414 return 415 } 416 if !c.shouldMirror(endpoints) { 417 logger.V(5).Info("Skipping mirroring", "endpoints", klog.KObj(endpoints)) 418 return 419 } 420 c.queueEndpoints(obj) 421 } 422 423 // onEndpointsUpdate queues a sync for the relevant Endpoints resource. 424 func (c *Controller) onEndpointsUpdate(logger klog.Logger, prevObj, obj interface{}) { 425 endpoints := obj.(*v1.Endpoints) 426 prevEndpoints := prevObj.(*v1.Endpoints) 427 if endpoints == nil || prevEndpoints == nil { 428 utilruntime.HandleError(fmt.Errorf("onEndpointsUpdate() expected type v1.Endpoints, got %T, %T", prevObj, obj)) 429 return 430 } 431 if !c.shouldMirror(endpoints) && !c.shouldMirror(prevEndpoints) { 432 logger.V(5).Info("Skipping mirroring", "endpoints", klog.KObj(endpoints)) 433 return 434 } 435 c.queueEndpoints(obj) 436 } 437 438 // onEndpointsDelete queues a sync for the relevant Endpoints resource. 439 func (c *Controller) onEndpointsDelete(logger klog.Logger, obj interface{}) { 440 endpoints := getEndpointsFromDeleteAction(obj) 441 if endpoints == nil { 442 utilruntime.HandleError(fmt.Errorf("onEndpointsDelete() expected type v1.Endpoints, got %T", obj)) 443 return 444 } 445 if !c.shouldMirror(endpoints) { 446 logger.V(5).Info("Skipping mirroring", "endpoints", klog.KObj(endpoints)) 447 return 448 } 449 c.queueEndpoints(obj) 450 } 451 452 // onEndpointSliceAdd queues a sync for the relevant Endpoints resource for a 453 // sync if the EndpointSlice resource version does not match the expected 454 // version in the endpointSliceTracker. 455 func (c *Controller) onEndpointSliceAdd(obj interface{}) { 456 endpointSlice := obj.(*discovery.EndpointSlice) 457 if endpointSlice == nil { 458 utilruntime.HandleError(fmt.Errorf("onEndpointSliceAdd() expected type discovery.EndpointSlice, got %T", obj)) 459 return 460 } 461 if managedByController(endpointSlice) && c.endpointSliceTracker.ShouldSync(endpointSlice) { 462 c.queueEndpointsForEndpointSlice(endpointSlice) 463 } 464 } 465 466 // onEndpointSliceUpdate queues a sync for the relevant Endpoints resource for a 467 // sync if the EndpointSlice resource version does not match the expected 468 // version in the endpointSliceTracker or the managed-by value of the 469 // EndpointSlice has changed from or to this controller. 470 func (c *Controller) onEndpointSliceUpdate(logger klog.Logger, prevObj, obj interface{}) { 471 endpointSlice := obj.(*discovery.EndpointSlice) 472 prevEndpointSlice := prevObj.(*discovery.EndpointSlice) 473 if endpointSlice == nil || prevEndpointSlice == nil { 474 utilruntime.HandleError(fmt.Errorf("onEndpointSliceUpdated() expected type discovery.EndpointSlice, got %T, %T", prevObj, obj)) 475 return 476 } 477 // EndpointSlice generation does not change when labels change. Although the 478 // controller will never change LabelServiceName, users might. This check 479 // ensures that we handle changes to this label. 480 svcName := endpointSlice.Labels[discovery.LabelServiceName] 481 prevSvcName := prevEndpointSlice.Labels[discovery.LabelServiceName] 482 if svcName != prevSvcName { 483 logger.Info("LabelServiceName changed", "labelServiceName", discovery.LabelServiceName, "oldName", prevSvcName, "newName", svcName, "endpointSlice", klog.KObj(endpointSlice)) 484 c.queueEndpointsForEndpointSlice(endpointSlice) 485 c.queueEndpointsForEndpointSlice(prevEndpointSlice) 486 return 487 } 488 if managedByChanged(prevEndpointSlice, endpointSlice) || (managedByController(endpointSlice) && c.endpointSliceTracker.ShouldSync(endpointSlice)) { 489 c.queueEndpointsForEndpointSlice(endpointSlice) 490 } 491 } 492 493 // onEndpointSliceDelete queues a sync for the relevant Endpoints resource for a 494 // sync if the EndpointSlice resource version does not match the expected 495 // version in the endpointSliceTracker. 496 func (c *Controller) onEndpointSliceDelete(obj interface{}) { 497 endpointSlice := getEndpointSliceFromDeleteAction(obj) 498 if endpointSlice == nil { 499 utilruntime.HandleError(fmt.Errorf("onEndpointSliceDelete() expected type discovery.EndpointSlice, got %T", obj)) 500 return 501 } 502 if managedByController(endpointSlice) && c.endpointSliceTracker.Has(endpointSlice) { 503 // This returns false if we didn't expect the EndpointSlice to be 504 // deleted. If that is the case, we queue the Service for another sync. 505 if !c.endpointSliceTracker.HandleDeletion(endpointSlice) { 506 c.queueEndpointsForEndpointSlice(endpointSlice) 507 } 508 } 509 } 510 511 // queueEndpointsForEndpointSlice attempts to queue the corresponding Endpoints 512 // resource for the provided EndpointSlice. 513 func (c *Controller) queueEndpointsForEndpointSlice(endpointSlice *discovery.EndpointSlice) { 514 key, err := endpointsControllerKey(endpointSlice) 515 if err != nil { 516 utilruntime.HandleError(fmt.Errorf("Couldn't get key for EndpointSlice %+v (type %T): %v", endpointSlice, endpointSlice, err)) 517 return 518 } 519 520 c.queue.AddAfter(key, c.endpointUpdatesBatchPeriod) 521 } 522 523 // deleteMirroredSlices will delete and EndpointSlices that have been mirrored 524 // for Endpoints with this namespace and name. 525 func (c *Controller) deleteMirroredSlices(namespace, name string) error { 526 endpointSlices, err := endpointSlicesMirroredForService(c.endpointSliceLister, namespace, name) 527 if err != nil { 528 return err 529 } 530 531 c.endpointSliceTracker.DeleteService(namespace, name) 532 return c.reconciler.deleteEndpoints(namespace, name, endpointSlices) 533 } 534 535 // endpointSlicesMirroredForService returns the EndpointSlices that have been 536 // mirrored for a Service by this controller. 537 func endpointSlicesMirroredForService(endpointSliceLister discoverylisters.EndpointSliceLister, namespace, name string) ([]*discovery.EndpointSlice, error) { 538 esLabelSelector := labels.Set(map[string]string{ 539 discovery.LabelServiceName: name, 540 discovery.LabelManagedBy: controllerName, 541 }).AsSelectorPreValidated() 542 return endpointSliceLister.EndpointSlices(namespace).List(esLabelSelector) 543 }