k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/endpoint/endpoints_controller.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package endpoint 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 apiequality "k8s.io/apimachinery/pkg/api/equality" 27 "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/conversion" 30 "k8s.io/apimachinery/pkg/labels" 31 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 32 "k8s.io/apimachinery/pkg/util/wait" 33 coreinformers "k8s.io/client-go/informers/core/v1" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/client-go/kubernetes/scheme" 36 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 37 corelisters "k8s.io/client-go/listers/core/v1" 38 "k8s.io/client-go/tools/cache" 39 "k8s.io/client-go/tools/leaderelection/resourcelock" 40 "k8s.io/client-go/tools/record" 41 "k8s.io/client-go/util/workqueue" 42 endpointsliceutil "k8s.io/endpointslice/util" 43 "k8s.io/klog/v2" 44 "k8s.io/kubernetes/pkg/api/v1/endpoints" 45 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 46 api "k8s.io/kubernetes/pkg/apis/core" 47 helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 48 "k8s.io/kubernetes/pkg/controller" 49 utillabels "k8s.io/kubernetes/pkg/util/labels" 50 utilnet "k8s.io/utils/net" 51 ) 52 53 const ( 54 // maxRetries is the number of times a service will be retried before it is dropped out of the queue. 55 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 56 // sequence of delays between successive queuings of a service. 57 // 58 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s 59 maxRetries = 15 60 61 // maxCapacity represents the maximum number of addresses that should be 62 // stored in an Endpoints resource. In a future release, this controller 63 // may truncate endpoints exceeding this length. 64 maxCapacity = 1000 65 66 // truncated is a possible value for `endpoints.kubernetes.io/over-capacity` annotation on an 67 // endpoint resource and indicates that the number of endpoints have been truncated to 68 // maxCapacity 69 truncated = "truncated" 70 ) 71 72 // NewEndpointController returns a new *Controller. 73 func NewEndpointController(ctx context.Context, podInformer coreinformers.PodInformer, serviceInformer coreinformers.ServiceInformer, 74 endpointsInformer coreinformers.EndpointsInformer, client clientset.Interface, endpointUpdatesBatchPeriod time.Duration) *Controller { 75 broadcaster := record.NewBroadcaster(record.WithContext(ctx)) 76 recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-controller"}) 77 78 e := &Controller{ 79 client: client, 80 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 81 workqueue.DefaultTypedControllerRateLimiter[string](), 82 workqueue.TypedRateLimitingQueueConfig[string]{ 83 Name: "endpoint", 84 }, 85 ), 86 workerLoopPeriod: time.Second, 87 } 88 89 serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 90 AddFunc: e.onServiceUpdate, 91 UpdateFunc: func(old, cur interface{}) { 92 e.onServiceUpdate(cur) 93 }, 94 DeleteFunc: e.onServiceDelete, 95 }) 96 e.serviceLister = serviceInformer.Lister() 97 e.servicesSynced = serviceInformer.Informer().HasSynced 98 99 podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 100 AddFunc: e.addPod, 101 UpdateFunc: e.updatePod, 102 DeleteFunc: e.deletePod, 103 }) 104 e.podLister = podInformer.Lister() 105 e.podsSynced = podInformer.Informer().HasSynced 106 107 endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 108 DeleteFunc: e.onEndpointsDelete, 109 }) 110 e.endpointsLister = endpointsInformer.Lister() 111 e.endpointsSynced = endpointsInformer.Informer().HasSynced 112 113 e.triggerTimeTracker = endpointsliceutil.NewTriggerTimeTracker() 114 e.eventBroadcaster = broadcaster 115 e.eventRecorder = recorder 116 117 e.endpointUpdatesBatchPeriod = endpointUpdatesBatchPeriod 118 119 return e 120 } 121 122 // Controller manages selector-based service endpoints. 123 type Controller struct { 124 client clientset.Interface 125 eventBroadcaster record.EventBroadcaster 126 eventRecorder record.EventRecorder 127 128 // serviceLister is able to list/get services and is populated by the shared informer passed to 129 // NewEndpointController. 130 serviceLister corelisters.ServiceLister 131 // servicesSynced returns true if the service shared informer has been synced at least once. 132 // Added as a member to the struct to allow injection for testing. 133 servicesSynced cache.InformerSynced 134 135 // podLister is able to list/get pods and is populated by the shared informer passed to 136 // NewEndpointController. 137 podLister corelisters.PodLister 138 // podsSynced returns true if the pod shared informer has been synced at least once. 139 // Added as a member to the struct to allow injection for testing. 140 podsSynced cache.InformerSynced 141 142 // endpointsLister is able to list/get endpoints and is populated by the shared informer passed to 143 // NewEndpointController. 144 endpointsLister corelisters.EndpointsLister 145 // endpointsSynced returns true if the endpoints shared informer has been synced at least once. 146 // Added as a member to the struct to allow injection for testing. 147 endpointsSynced cache.InformerSynced 148 149 // Services that need to be updated. A channel is inappropriate here, 150 // because it allows services with lots of pods to be serviced much 151 // more often than services with few pods; it also would cause a 152 // service that's inserted multiple times to be processed more than 153 // necessary. 154 queue workqueue.TypedRateLimitingInterface[string] 155 156 // workerLoopPeriod is the time between worker runs. The workers process the queue of service and pod changes. 157 workerLoopPeriod time.Duration 158 159 // triggerTimeTracker is an util used to compute and export the EndpointsLastChangeTriggerTime 160 // annotation. 161 triggerTimeTracker *endpointsliceutil.TriggerTimeTracker 162 163 endpointUpdatesBatchPeriod time.Duration 164 } 165 166 // Run will not return until stopCh is closed. workers determines how many 167 // endpoints will be handled in parallel. 168 func (e *Controller) Run(ctx context.Context, workers int) { 169 defer utilruntime.HandleCrash() 170 171 // Start events processing pipeline. 172 e.eventBroadcaster.StartStructuredLogging(3) 173 e.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: e.client.CoreV1().Events("")}) 174 defer e.eventBroadcaster.Shutdown() 175 176 defer e.queue.ShutDown() 177 178 logger := klog.FromContext(ctx) 179 logger.Info("Starting endpoint controller") 180 defer logger.Info("Shutting down endpoint controller") 181 182 if !cache.WaitForNamedCacheSync("endpoint", ctx.Done(), e.podsSynced, e.servicesSynced, e.endpointsSynced) { 183 return 184 } 185 186 for i := 0; i < workers; i++ { 187 go wait.UntilWithContext(ctx, e.worker, e.workerLoopPeriod) 188 } 189 190 go func() { 191 defer utilruntime.HandleCrash() 192 e.checkLeftoverEndpoints() 193 }() 194 195 <-ctx.Done() 196 } 197 198 // When a pod is added, figure out what services it will be a member of and 199 // enqueue them. obj must have *v1.Pod type. 200 func (e *Controller) addPod(obj interface{}) { 201 pod := obj.(*v1.Pod) 202 services, err := endpointsliceutil.GetPodServiceMemberships(e.serviceLister, pod) 203 if err != nil { 204 utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", pod.Namespace, pod.Name, err)) 205 return 206 } 207 for key := range services { 208 e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod) 209 } 210 } 211 212 func podToEndpointAddressForService(svc *v1.Service, pod *v1.Pod) (*v1.EndpointAddress, error) { 213 var endpointIP string 214 ipFamily := v1.IPv4Protocol 215 216 if len(svc.Spec.IPFamilies) > 0 { 217 // controller is connected to an api-server that correctly sets IPFamilies 218 ipFamily = svc.Spec.IPFamilies[0] // this works for headful and headless 219 } else { 220 // controller is connected to an api server that does not correctly 221 // set IPFamilies (e.g. old api-server during an upgrade) 222 // TODO (khenidak): remove by when the possibility of upgrading 223 // from a cluster that does not support dual stack is nil 224 if len(svc.Spec.ClusterIP) > 0 && svc.Spec.ClusterIP != v1.ClusterIPNone { 225 // headful service. detect via service clusterIP 226 if utilnet.IsIPv6String(svc.Spec.ClusterIP) { 227 ipFamily = v1.IPv6Protocol 228 } 229 } else { 230 // Since this is a headless service we use podIP to identify the family. 231 // This assumes that status.PodIP is assigned correctly (follows pod cidr and 232 // pod cidr list order is same as service cidr list order). The expectation is 233 // this is *most probably* the case. 234 235 // if the family was incorrectly identified then this will be corrected once the 236 // upgrade is completed (controller connects to api-server that correctly defaults services) 237 if utilnet.IsIPv6String(pod.Status.PodIP) { 238 ipFamily = v1.IPv6Protocol 239 } 240 } 241 } 242 243 // find an ip that matches the family 244 for _, podIP := range pod.Status.PodIPs { 245 if (ipFamily == v1.IPv6Protocol) == utilnet.IsIPv6String(podIP.IP) { 246 endpointIP = podIP.IP 247 break 248 } 249 } 250 251 if endpointIP == "" { 252 return nil, fmt.Errorf("failed to find a matching endpoint for service %v", svc.Name) 253 } 254 255 return &v1.EndpointAddress{ 256 IP: endpointIP, 257 NodeName: &pod.Spec.NodeName, 258 TargetRef: &v1.ObjectReference{ 259 Kind: "Pod", 260 Namespace: pod.ObjectMeta.Namespace, 261 Name: pod.ObjectMeta.Name, 262 UID: pod.ObjectMeta.UID, 263 }, 264 }, nil 265 } 266 267 // When a pod is updated, figure out what services it used to be a member of 268 // and what services it will be a member of, and enqueue the union of these. 269 // old and cur must be *v1.Pod types. 270 func (e *Controller) updatePod(old, cur interface{}) { 271 services := endpointsliceutil.GetServicesToUpdateOnPodChange(e.serviceLister, old, cur) 272 for key := range services { 273 e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod) 274 } 275 } 276 277 // When a pod is deleted, enqueue the services the pod used to be a member of. 278 // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. 279 func (e *Controller) deletePod(obj interface{}) { 280 pod := endpointsliceutil.GetPodFromDeleteAction(obj) 281 if pod != nil { 282 e.addPod(pod) 283 } 284 } 285 286 // onServiceUpdate updates the Service Selector in the cache and queues the Service for processing. 287 func (e *Controller) onServiceUpdate(obj interface{}) { 288 key, err := controller.KeyFunc(obj) 289 if err != nil { 290 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 291 return 292 } 293 e.queue.Add(key) 294 } 295 296 // onServiceDelete removes the Service Selector from the cache and queues the Service for processing. 297 func (e *Controller) onServiceDelete(obj interface{}) { 298 key, err := controller.KeyFunc(obj) 299 if err != nil { 300 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 301 return 302 } 303 e.queue.Add(key) 304 } 305 306 func (e *Controller) onEndpointsDelete(obj interface{}) { 307 key, err := controller.KeyFunc(obj) 308 if err != nil { 309 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 310 return 311 } 312 e.queue.Add(key) 313 } 314 315 // worker runs a worker thread that just dequeues items, processes them, and 316 // marks them done. You may run as many of these in parallel as you wish; the 317 // workqueue guarantees that they will not end up processing the same service 318 // at the same time. 319 func (e *Controller) worker(ctx context.Context) { 320 for e.processNextWorkItem(ctx) { 321 } 322 } 323 324 func (e *Controller) processNextWorkItem(ctx context.Context) bool { 325 eKey, quit := e.queue.Get() 326 if quit { 327 return false 328 } 329 defer e.queue.Done(eKey) 330 331 logger := klog.FromContext(ctx) 332 err := e.syncService(ctx, eKey) 333 e.handleErr(logger, err, eKey) 334 335 return true 336 } 337 338 func (e *Controller) handleErr(logger klog.Logger, err error, key string) { 339 if err == nil { 340 e.queue.Forget(key) 341 return 342 } 343 344 ns, name, keyErr := cache.SplitMetaNamespaceKey(key) 345 if keyErr != nil { 346 logger.Error(err, "Failed to split meta namespace cache key", "key", key) 347 } 348 349 if e.queue.NumRequeues(key) < maxRetries { 350 logger.V(2).Info("Error syncing endpoints, retrying", "service", klog.KRef(ns, name), "err", err) 351 e.queue.AddRateLimited(key) 352 return 353 } 354 355 logger.Info("Dropping service out of the queue", "service", klog.KRef(ns, name), "err", err) 356 e.queue.Forget(key) 357 utilruntime.HandleError(err) 358 } 359 360 func (e *Controller) syncService(ctx context.Context, key string) error { 361 startTime := time.Now() 362 logger := klog.FromContext(ctx) 363 namespace, name, err := cache.SplitMetaNamespaceKey(key) 364 if err != nil { 365 return err 366 } 367 defer func() { 368 logger.V(4).Info("Finished syncing service endpoints", "service", klog.KRef(namespace, name), "startTime", time.Since(startTime)) 369 }() 370 371 service, err := e.serviceLister.Services(namespace).Get(name) 372 if err != nil { 373 if !errors.IsNotFound(err) { 374 return err 375 } 376 377 // Delete the corresponding endpoint, as the service has been deleted. 378 // TODO: Please note that this will delete an endpoint when a 379 // service is deleted. However, if we're down at the time when 380 // the service is deleted, we will miss that deletion, so this 381 // doesn't completely solve the problem. See #6877. 382 err = e.client.CoreV1().Endpoints(namespace).Delete(ctx, name, metav1.DeleteOptions{}) 383 if err != nil && !errors.IsNotFound(err) { 384 return err 385 } 386 e.triggerTimeTracker.DeleteService(namespace, name) 387 return nil 388 } 389 390 if service.Spec.Type == v1.ServiceTypeExternalName { 391 // services with Type ExternalName receive no endpoints from this controller; 392 // Ref: https://issues.k8s.io/105986 393 return nil 394 } 395 396 if service.Spec.Selector == nil { 397 // services without a selector receive no endpoints from this controller; 398 // these services will receive the endpoints that are created out-of-band via the REST API. 399 return nil 400 } 401 402 logger.V(5).Info("About to update endpoints for service", "service", klog.KRef(namespace, name)) 403 pods, err := e.podLister.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelectorPreValidated()) 404 if err != nil { 405 // Since we're getting stuff from a local cache, it is 406 // basically impossible to get this error. 407 return err 408 } 409 410 // We call ComputeEndpointLastChangeTriggerTime here to make sure that the 411 // state of the trigger time tracker gets updated even if the sync turns out 412 // to be no-op and we don't update the endpoints object. 413 endpointsLastChangeTriggerTime := e.triggerTimeTracker. 414 ComputeEndpointLastChangeTriggerTime(namespace, service, pods) 415 416 subsets := []v1.EndpointSubset{} 417 var totalReadyEps int 418 var totalNotReadyEps int 419 420 for _, pod := range pods { 421 if !endpointsliceutil.ShouldPodBeInEndpoints(pod, service.Spec.PublishNotReadyAddresses) { 422 logger.V(5).Info("Pod is not included on endpoints for Service", "pod", klog.KObj(pod), "service", klog.KObj(service)) 423 continue 424 } 425 426 ep, err := podToEndpointAddressForService(service, pod) 427 if err != nil { 428 // this will happen, if the cluster runs with some nodes configured as dual stack and some as not 429 // such as the case of an upgrade.. 430 logger.V(2).Info("Failed to find endpoint for service with ClusterIP on pod with error", "service", klog.KObj(service), "clusterIP", service.Spec.ClusterIP, "pod", klog.KObj(pod), "error", err) 431 continue 432 } 433 434 epa := *ep 435 if endpointsliceutil.ShouldSetHostname(pod, service) { 436 epa.Hostname = pod.Spec.Hostname 437 } 438 439 // Allow headless service not to have ports. 440 if len(service.Spec.Ports) == 0 { 441 if service.Spec.ClusterIP == api.ClusterIPNone { 442 subsets, totalReadyEps, totalNotReadyEps = addEndpointSubset(logger, subsets, pod, epa, nil, service.Spec.PublishNotReadyAddresses) 443 // No need to repack subsets for headless service without ports. 444 } 445 } else { 446 for i := range service.Spec.Ports { 447 servicePort := &service.Spec.Ports[i] 448 portNum, err := podutil.FindPort(pod, servicePort) 449 if err != nil { 450 logger.V(4).Info("Failed to find port for service", "service", klog.KObj(service), "error", err) 451 continue 452 } 453 epp := endpointPortFromServicePort(servicePort, portNum) 454 455 var readyEps, notReadyEps int 456 subsets, readyEps, notReadyEps = addEndpointSubset(logger, subsets, pod, epa, epp, service.Spec.PublishNotReadyAddresses) 457 totalReadyEps = totalReadyEps + readyEps 458 totalNotReadyEps = totalNotReadyEps + notReadyEps 459 } 460 } 461 } 462 subsets = endpoints.RepackSubsets(subsets) 463 464 // See if there's actually an update here. 465 currentEndpoints, err := e.endpointsLister.Endpoints(service.Namespace).Get(service.Name) 466 if err != nil { 467 if !errors.IsNotFound(err) { 468 return err 469 } 470 currentEndpoints = &v1.Endpoints{ 471 ObjectMeta: metav1.ObjectMeta{ 472 Name: service.Name, 473 Labels: service.Labels, 474 }, 475 } 476 } 477 478 createEndpoints := len(currentEndpoints.ResourceVersion) == 0 479 480 // Compare the sorted subsets and labels 481 // Remove the HeadlessService label from the endpoints if it exists, 482 // as this won't be set on the service itself 483 // and will cause a false negative in this diff check. 484 // But first check if it has that label to avoid expensive copies. 485 compareLabels := currentEndpoints.Labels 486 if _, ok := currentEndpoints.Labels[v1.IsHeadlessService]; ok { 487 compareLabels = utillabels.CloneAndRemoveLabel(currentEndpoints.Labels, v1.IsHeadlessService) 488 } 489 // When comparing the subsets, we ignore the difference in ResourceVersion of Pod to avoid unnecessary Endpoints 490 // updates caused by Pod updates that we don't care, e.g. annotation update. 491 if !createEndpoints && 492 endpointSubsetsEqualIgnoreResourceVersion(currentEndpoints.Subsets, subsets) && 493 apiequality.Semantic.DeepEqual(compareLabels, service.Labels) && 494 capacityAnnotationSetCorrectly(currentEndpoints.Annotations, currentEndpoints.Subsets) { 495 logger.V(5).Info("endpoints are equal, skipping update", "service", klog.KObj(service)) 496 return nil 497 } 498 newEndpoints := currentEndpoints.DeepCopy() 499 newEndpoints.Subsets = subsets 500 newEndpoints.Labels = service.Labels 501 if newEndpoints.Annotations == nil { 502 newEndpoints.Annotations = make(map[string]string) 503 } 504 505 if !endpointsLastChangeTriggerTime.IsZero() { 506 newEndpoints.Annotations[v1.EndpointsLastChangeTriggerTime] = 507 endpointsLastChangeTriggerTime.UTC().Format(time.RFC3339Nano) 508 } else { // No new trigger time, clear the annotation. 509 delete(newEndpoints.Annotations, v1.EndpointsLastChangeTriggerTime) 510 } 511 512 if truncateEndpoints(newEndpoints) { 513 newEndpoints.Annotations[v1.EndpointsOverCapacity] = truncated 514 } else { 515 delete(newEndpoints.Annotations, v1.EndpointsOverCapacity) 516 } 517 518 if newEndpoints.Labels == nil { 519 newEndpoints.Labels = make(map[string]string) 520 } 521 522 if !helper.IsServiceIPSet(service) { 523 newEndpoints.Labels = utillabels.CloneAndAddLabel(newEndpoints.Labels, v1.IsHeadlessService, "") 524 } else { 525 newEndpoints.Labels = utillabels.CloneAndRemoveLabel(newEndpoints.Labels, v1.IsHeadlessService) 526 } 527 528 logger.V(4).Info("Update endpoints", "service", klog.KObj(service), "readyEndpoints", totalReadyEps, "notreadyEndpoints", totalNotReadyEps) 529 if createEndpoints { 530 // No previous endpoints, create them 531 _, err = e.client.CoreV1().Endpoints(service.Namespace).Create(ctx, newEndpoints, metav1.CreateOptions{}) 532 } else { 533 // Pre-existing 534 _, err = e.client.CoreV1().Endpoints(service.Namespace).Update(ctx, newEndpoints, metav1.UpdateOptions{}) 535 } 536 if err != nil { 537 if createEndpoints && errors.IsForbidden(err) { 538 // A request is forbidden primarily for two reasons: 539 // 1. namespace is terminating, endpoint creation is not allowed by default. 540 // 2. policy is misconfigured, in which case no service would function anywhere. 541 // Given the frequency of 1, we log at a lower level. 542 logger.V(5).Info("Forbidden from creating endpoints", "error", err) 543 544 // If the namespace is terminating, creates will continue to fail. Simply drop the item. 545 if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { 546 return nil 547 } 548 } 549 550 if createEndpoints { 551 e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToCreateEndpoint", "Failed to create endpoint for service %v/%v: %v", service.Namespace, service.Name, err) 552 } else { 553 e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToUpdateEndpoint", "Failed to update endpoint %v/%v: %v", service.Namespace, service.Name, err) 554 } 555 556 return err 557 } 558 return nil 559 } 560 561 // checkLeftoverEndpoints lists all currently existing endpoints and adds their 562 // service to the queue. This will detect endpoints that exist with no 563 // corresponding service; these endpoints need to be deleted. We only need to 564 // do this once on startup, because in steady-state these are detected (but 565 // some stragglers could have been left behind if the endpoint controller 566 // reboots). 567 func (e *Controller) checkLeftoverEndpoints() { 568 list, err := e.endpointsLister.List(labels.Everything()) 569 if err != nil { 570 utilruntime.HandleError(fmt.Errorf("Unable to list endpoints (%v); orphaned endpoints will not be cleaned up. (They're pretty harmless, but you can restart this component if you want another attempt made.)", err)) 571 return 572 } 573 for _, ep := range list { 574 if _, ok := ep.Annotations[resourcelock.LeaderElectionRecordAnnotationKey]; ok { 575 // when there are multiple controller-manager instances, 576 // we observe that it will delete leader-election endpoints after 5min 577 // and cause re-election 578 // so skip the delete here 579 // as leader-election only have endpoints without service 580 continue 581 } 582 key, err := controller.KeyFunc(ep) 583 if err != nil { 584 utilruntime.HandleError(fmt.Errorf("Unable to get key for endpoint %#v", ep)) 585 continue 586 } 587 e.queue.Add(key) 588 } 589 } 590 591 // addEndpointSubset add the endpoints addresses and ports to the EndpointSubset. 592 // The addresses are added to the corresponding field, ready or not ready, depending 593 // on the pod status and the Service PublishNotReadyAddresses field value. 594 // The pod passed to this function must have already been filtered through ShouldPodBeInEndpoints. 595 func addEndpointSubset(logger klog.Logger, subsets []v1.EndpointSubset, pod *v1.Pod, epa v1.EndpointAddress, 596 epp *v1.EndpointPort, tolerateUnreadyEndpoints bool) ([]v1.EndpointSubset, int, int) { 597 var readyEps int 598 var notReadyEps int 599 ports := []v1.EndpointPort{} 600 if epp != nil { 601 ports = append(ports, *epp) 602 } 603 if tolerateUnreadyEndpoints || podutil.IsPodReady(pod) { 604 subsets = append(subsets, v1.EndpointSubset{ 605 Addresses: []v1.EndpointAddress{epa}, 606 Ports: ports, 607 }) 608 readyEps++ 609 } else { // if it is not a ready address it has to be not ready 610 logger.V(5).Info("Pod is out of service", "pod", klog.KObj(pod)) 611 subsets = append(subsets, v1.EndpointSubset{ 612 NotReadyAddresses: []v1.EndpointAddress{epa}, 613 Ports: ports, 614 }) 615 notReadyEps++ 616 } 617 return subsets, readyEps, notReadyEps 618 } 619 620 func endpointPortFromServicePort(servicePort *v1.ServicePort, portNum int) *v1.EndpointPort { 621 return &v1.EndpointPort{ 622 Name: servicePort.Name, 623 Port: int32(portNum), 624 Protocol: servicePort.Protocol, 625 AppProtocol: servicePort.AppProtocol, 626 } 627 } 628 629 // capacityAnnotationSetCorrectly returns false if number of endpoints is greater than maxCapacity or 630 // returns true if underCapacity and the annotation is not set. 631 func capacityAnnotationSetCorrectly(annotations map[string]string, subsets []v1.EndpointSubset) bool { 632 numEndpoints := 0 633 for _, subset := range subsets { 634 numEndpoints += len(subset.Addresses) + len(subset.NotReadyAddresses) 635 } 636 if numEndpoints > maxCapacity { 637 // If subsets are over capacity, they must be truncated so consider 638 // the annotation as not set correctly 639 return false 640 } 641 _, ok := annotations[v1.EndpointsOverCapacity] 642 return !ok 643 } 644 645 // truncateEndpoints by best effort will distribute the endpoints over the subsets based on the proportion 646 // of endpoints per subset and will prioritize Ready Endpoints over NotReady Endpoints. 647 func truncateEndpoints(endpoints *v1.Endpoints) bool { 648 totalReady := 0 649 totalNotReady := 0 650 for _, subset := range endpoints.Subsets { 651 totalReady += len(subset.Addresses) 652 totalNotReady += len(subset.NotReadyAddresses) 653 } 654 655 if totalReady+totalNotReady <= maxCapacity { 656 return false 657 } 658 659 truncateReady := false 660 max := maxCapacity - totalReady 661 numTotal := totalNotReady 662 if totalReady > maxCapacity { 663 truncateReady = true 664 max = maxCapacity 665 numTotal = totalReady 666 } 667 canBeAdded := max 668 669 for i := range endpoints.Subsets { 670 subset := endpoints.Subsets[i] 671 numInSubset := len(subset.Addresses) 672 if !truncateReady { 673 numInSubset = len(subset.NotReadyAddresses) 674 } 675 676 // The number of endpoints per subset will be based on the propotion of endpoints 677 // in this subset versus the total number of endpoints. The proportion of endpoints 678 // will be rounded up which most likely will lead to the last subset having less 679 // endpoints than the expected proportion. 680 toBeAdded := int(math.Ceil((float64(numInSubset) / float64(numTotal)) * float64(max))) 681 // If there is not enough endpoints for the last subset, ensure only the number up 682 // to the capacity are added 683 if toBeAdded > canBeAdded { 684 toBeAdded = canBeAdded 685 } 686 687 if truncateReady { 688 // Truncate ready Addresses to allocated proportion and truncate all not ready 689 // addresses 690 subset.Addresses = addressSubset(subset.Addresses, toBeAdded) 691 subset.NotReadyAddresses = []v1.EndpointAddress{} 692 canBeAdded -= len(subset.Addresses) 693 } else { 694 // Only truncate the not ready addresses 695 subset.NotReadyAddresses = addressSubset(subset.NotReadyAddresses, toBeAdded) 696 canBeAdded -= len(subset.NotReadyAddresses) 697 } 698 endpoints.Subsets[i] = subset 699 } 700 return true 701 } 702 703 // addressSubset takes a list of addresses and returns a subset if the length is greater 704 // than the maxNum. If less than the maxNum, the entire list is returned. 705 func addressSubset(addresses []v1.EndpointAddress, maxNum int) []v1.EndpointAddress { 706 if len(addresses) <= maxNum { 707 return addresses 708 } 709 return addresses[0:maxNum] 710 } 711 712 // semanticIgnoreResourceVersion does semantic deep equality checks for objects 713 // but excludes ResourceVersion of ObjectReference. They are used when comparing 714 // endpoints in Endpoints and EndpointSlice objects to avoid unnecessary updates 715 // caused by Pod resourceVersion change. 716 var semanticIgnoreResourceVersion = conversion.EqualitiesOrDie( 717 func(a, b v1.ObjectReference) bool { 718 a.ResourceVersion = "" 719 b.ResourceVersion = "" 720 return a == b 721 }, 722 ) 723 724 // endpointSubsetsEqualIgnoreResourceVersion returns true if EndpointSubsets 725 // have equal attributes but excludes ResourceVersion of Pod. 726 func endpointSubsetsEqualIgnoreResourceVersion(subsets1, subsets2 []v1.EndpointSubset) bool { 727 return semanticIgnoreResourceVersion.DeepEqual(subsets1, subsets2) 728 }