k8s.io/kubernetes@v1.29.3/pkg/controller/endpoint/endpoints_controller.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package endpoint 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 apiequality "k8s.io/apimachinery/pkg/api/equality" 27 "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/conversion" 30 "k8s.io/apimachinery/pkg/labels" 31 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 32 "k8s.io/apimachinery/pkg/util/wait" 33 coreinformers "k8s.io/client-go/informers/core/v1" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/client-go/kubernetes/scheme" 36 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 37 corelisters "k8s.io/client-go/listers/core/v1" 38 "k8s.io/client-go/tools/cache" 39 "k8s.io/client-go/tools/leaderelection/resourcelock" 40 "k8s.io/client-go/tools/record" 41 "k8s.io/client-go/util/workqueue" 42 endpointsliceutil "k8s.io/endpointslice/util" 43 "k8s.io/klog/v2" 44 "k8s.io/kubernetes/pkg/api/v1/endpoints" 45 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 46 api "k8s.io/kubernetes/pkg/apis/core" 47 helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 48 "k8s.io/kubernetes/pkg/controller" 49 utillabels "k8s.io/kubernetes/pkg/util/labels" 50 utilnet "k8s.io/utils/net" 51 ) 52 53 const ( 54 // maxRetries is the number of times a service will be retried before it is dropped out of the queue. 55 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 56 // sequence of delays between successive queuings of a service. 57 // 58 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s 59 maxRetries = 15 60 61 // maxCapacity represents the maximum number of addresses that should be 62 // stored in an Endpoints resource. In a future release, this controller 63 // may truncate endpoints exceeding this length. 64 maxCapacity = 1000 65 66 // truncated is a possible value for `endpoints.kubernetes.io/over-capacity` annotation on an 67 // endpoint resource and indicates that the number of endpoints have been truncated to 68 // maxCapacity 69 truncated = "truncated" 70 ) 71 72 // NewEndpointController returns a new *Controller. 73 func NewEndpointController(podInformer coreinformers.PodInformer, serviceInformer coreinformers.ServiceInformer, 74 endpointsInformer coreinformers.EndpointsInformer, client clientset.Interface, endpointUpdatesBatchPeriod time.Duration) *Controller { 75 broadcaster := record.NewBroadcaster() 76 recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-controller"}) 77 78 e := &Controller{ 79 client: client, 80 queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint"), 81 workerLoopPeriod: time.Second, 82 } 83 84 serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 85 AddFunc: e.onServiceUpdate, 86 UpdateFunc: func(old, cur interface{}) { 87 e.onServiceUpdate(cur) 88 }, 89 DeleteFunc: e.onServiceDelete, 90 }) 91 e.serviceLister = serviceInformer.Lister() 92 e.servicesSynced = serviceInformer.Informer().HasSynced 93 94 podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 95 AddFunc: e.addPod, 96 UpdateFunc: e.updatePod, 97 DeleteFunc: e.deletePod, 98 }) 99 e.podLister = podInformer.Lister() 100 e.podsSynced = podInformer.Informer().HasSynced 101 102 endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 103 DeleteFunc: e.onEndpointsDelete, 104 }) 105 e.endpointsLister = endpointsInformer.Lister() 106 e.endpointsSynced = endpointsInformer.Informer().HasSynced 107 108 e.triggerTimeTracker = endpointsliceutil.NewTriggerTimeTracker() 109 e.eventBroadcaster = broadcaster 110 e.eventRecorder = recorder 111 112 e.endpointUpdatesBatchPeriod = endpointUpdatesBatchPeriod 113 114 return e 115 } 116 117 // Controller manages selector-based service endpoints. 118 type Controller struct { 119 client clientset.Interface 120 eventBroadcaster record.EventBroadcaster 121 eventRecorder record.EventRecorder 122 123 // serviceLister is able to list/get services and is populated by the shared informer passed to 124 // NewEndpointController. 125 serviceLister corelisters.ServiceLister 126 // servicesSynced returns true if the service shared informer has been synced at least once. 127 // Added as a member to the struct to allow injection for testing. 128 servicesSynced cache.InformerSynced 129 130 // podLister is able to list/get pods and is populated by the shared informer passed to 131 // NewEndpointController. 132 podLister corelisters.PodLister 133 // podsSynced returns true if the pod shared informer has been synced at least once. 134 // Added as a member to the struct to allow injection for testing. 135 podsSynced cache.InformerSynced 136 137 // endpointsLister is able to list/get endpoints and is populated by the shared informer passed to 138 // NewEndpointController. 139 endpointsLister corelisters.EndpointsLister 140 // endpointsSynced returns true if the endpoints shared informer has been synced at least once. 141 // Added as a member to the struct to allow injection for testing. 142 endpointsSynced cache.InformerSynced 143 144 // Services that need to be updated. A channel is inappropriate here, 145 // because it allows services with lots of pods to be serviced much 146 // more often than services with few pods; it also would cause a 147 // service that's inserted multiple times to be processed more than 148 // necessary. 149 queue workqueue.RateLimitingInterface 150 151 // workerLoopPeriod is the time between worker runs. The workers process the queue of service and pod changes. 152 workerLoopPeriod time.Duration 153 154 // triggerTimeTracker is an util used to compute and export the EndpointsLastChangeTriggerTime 155 // annotation. 156 triggerTimeTracker *endpointsliceutil.TriggerTimeTracker 157 158 endpointUpdatesBatchPeriod time.Duration 159 } 160 161 // Run will not return until stopCh is closed. workers determines how many 162 // endpoints will be handled in parallel. 163 func (e *Controller) Run(ctx context.Context, workers int) { 164 defer utilruntime.HandleCrash() 165 166 // Start events processing pipeline. 167 e.eventBroadcaster.StartStructuredLogging(0) 168 e.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: e.client.CoreV1().Events("")}) 169 defer e.eventBroadcaster.Shutdown() 170 171 defer e.queue.ShutDown() 172 173 logger := klog.FromContext(ctx) 174 logger.Info("Starting endpoint controller") 175 defer logger.Info("Shutting down endpoint controller") 176 177 if !cache.WaitForNamedCacheSync("endpoint", ctx.Done(), e.podsSynced, e.servicesSynced, e.endpointsSynced) { 178 return 179 } 180 181 for i := 0; i < workers; i++ { 182 go wait.UntilWithContext(ctx, e.worker, e.workerLoopPeriod) 183 } 184 185 go func() { 186 defer utilruntime.HandleCrash() 187 e.checkLeftoverEndpoints() 188 }() 189 190 <-ctx.Done() 191 } 192 193 // When a pod is added, figure out what services it will be a member of and 194 // enqueue them. obj must have *v1.Pod type. 195 func (e *Controller) addPod(obj interface{}) { 196 pod := obj.(*v1.Pod) 197 services, err := endpointsliceutil.GetPodServiceMemberships(e.serviceLister, pod) 198 if err != nil { 199 utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", pod.Namespace, pod.Name, err)) 200 return 201 } 202 for key := range services { 203 e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod) 204 } 205 } 206 207 func podToEndpointAddressForService(svc *v1.Service, pod *v1.Pod) (*v1.EndpointAddress, error) { 208 var endpointIP string 209 ipFamily := v1.IPv4Protocol 210 211 if len(svc.Spec.IPFamilies) > 0 { 212 // controller is connected to an api-server that correctly sets IPFamilies 213 ipFamily = svc.Spec.IPFamilies[0] // this works for headful and headless 214 } else { 215 // controller is connected to an api server that does not correctly 216 // set IPFamilies (e.g. old api-server during an upgrade) 217 // TODO (khenidak): remove by when the possibility of upgrading 218 // from a cluster that does not support dual stack is nil 219 if len(svc.Spec.ClusterIP) > 0 && svc.Spec.ClusterIP != v1.ClusterIPNone { 220 // headful service. detect via service clusterIP 221 if utilnet.IsIPv6String(svc.Spec.ClusterIP) { 222 ipFamily = v1.IPv6Protocol 223 } 224 } else { 225 // Since this is a headless service we use podIP to identify the family. 226 // This assumes that status.PodIP is assigned correctly (follows pod cidr and 227 // pod cidr list order is same as service cidr list order). The expectation is 228 // this is *most probably* the case. 229 230 // if the family was incorrectly identified then this will be corrected once the 231 // upgrade is completed (controller connects to api-server that correctly defaults services) 232 if utilnet.IsIPv6String(pod.Status.PodIP) { 233 ipFamily = v1.IPv6Protocol 234 } 235 } 236 } 237 238 // find an ip that matches the family 239 for _, podIP := range pod.Status.PodIPs { 240 if (ipFamily == v1.IPv6Protocol) == utilnet.IsIPv6String(podIP.IP) { 241 endpointIP = podIP.IP 242 break 243 } 244 } 245 246 if endpointIP == "" { 247 return nil, fmt.Errorf("failed to find a matching endpoint for service %v", svc.Name) 248 } 249 250 return &v1.EndpointAddress{ 251 IP: endpointIP, 252 NodeName: &pod.Spec.NodeName, 253 TargetRef: &v1.ObjectReference{ 254 Kind: "Pod", 255 Namespace: pod.ObjectMeta.Namespace, 256 Name: pod.ObjectMeta.Name, 257 UID: pod.ObjectMeta.UID, 258 }, 259 }, nil 260 } 261 262 // When a pod is updated, figure out what services it used to be a member of 263 // and what services it will be a member of, and enqueue the union of these. 264 // old and cur must be *v1.Pod types. 265 func (e *Controller) updatePod(old, cur interface{}) { 266 services := endpointsliceutil.GetServicesToUpdateOnPodChange(e.serviceLister, old, cur) 267 for key := range services { 268 e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod) 269 } 270 } 271 272 // When a pod is deleted, enqueue the services the pod used to be a member of. 273 // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. 274 func (e *Controller) deletePod(obj interface{}) { 275 pod := endpointsliceutil.GetPodFromDeleteAction(obj) 276 if pod != nil { 277 e.addPod(pod) 278 } 279 } 280 281 // onServiceUpdate updates the Service Selector in the cache and queues the Service for processing. 282 func (e *Controller) onServiceUpdate(obj interface{}) { 283 key, err := controller.KeyFunc(obj) 284 if err != nil { 285 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 286 return 287 } 288 e.queue.Add(key) 289 } 290 291 // onServiceDelete removes the Service Selector from the cache and queues the Service for processing. 292 func (e *Controller) onServiceDelete(obj interface{}) { 293 key, err := controller.KeyFunc(obj) 294 if err != nil { 295 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 296 return 297 } 298 e.queue.Add(key) 299 } 300 301 func (e *Controller) onEndpointsDelete(obj interface{}) { 302 key, err := controller.KeyFunc(obj) 303 if err != nil { 304 utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err)) 305 return 306 } 307 e.queue.Add(key) 308 } 309 310 // worker runs a worker thread that just dequeues items, processes them, and 311 // marks them done. You may run as many of these in parallel as you wish; the 312 // workqueue guarantees that they will not end up processing the same service 313 // at the same time. 314 func (e *Controller) worker(ctx context.Context) { 315 for e.processNextWorkItem(ctx) { 316 } 317 } 318 319 func (e *Controller) processNextWorkItem(ctx context.Context) bool { 320 eKey, quit := e.queue.Get() 321 if quit { 322 return false 323 } 324 defer e.queue.Done(eKey) 325 326 logger := klog.FromContext(ctx) 327 err := e.syncService(ctx, eKey.(string)) 328 e.handleErr(logger, err, eKey) 329 330 return true 331 } 332 333 func (e *Controller) handleErr(logger klog.Logger, err error, key interface{}) { 334 if err == nil { 335 e.queue.Forget(key) 336 return 337 } 338 339 ns, name, keyErr := cache.SplitMetaNamespaceKey(key.(string)) 340 if keyErr != nil { 341 logger.Error(err, "Failed to split meta namespace cache key", "key", key) 342 } 343 344 if e.queue.NumRequeues(key) < maxRetries { 345 logger.V(2).Info("Error syncing endpoints, retrying", "service", klog.KRef(ns, name), "err", err) 346 e.queue.AddRateLimited(key) 347 return 348 } 349 350 logger.Info("Dropping service out of the queue", "service", klog.KRef(ns, name), "err", err) 351 e.queue.Forget(key) 352 utilruntime.HandleError(err) 353 } 354 355 func (e *Controller) syncService(ctx context.Context, key string) error { 356 startTime := time.Now() 357 logger := klog.FromContext(ctx) 358 namespace, name, err := cache.SplitMetaNamespaceKey(key) 359 if err != nil { 360 return err 361 } 362 defer func() { 363 logger.V(4).Info("Finished syncing service endpoints", "service", klog.KRef(namespace, name), "startTime", time.Since(startTime)) 364 }() 365 366 service, err := e.serviceLister.Services(namespace).Get(name) 367 if err != nil { 368 if !errors.IsNotFound(err) { 369 return err 370 } 371 372 // Delete the corresponding endpoint, as the service has been deleted. 373 // TODO: Please note that this will delete an endpoint when a 374 // service is deleted. However, if we're down at the time when 375 // the service is deleted, we will miss that deletion, so this 376 // doesn't completely solve the problem. See #6877. 377 err = e.client.CoreV1().Endpoints(namespace).Delete(ctx, name, metav1.DeleteOptions{}) 378 if err != nil && !errors.IsNotFound(err) { 379 return err 380 } 381 e.triggerTimeTracker.DeleteService(namespace, name) 382 return nil 383 } 384 385 if service.Spec.Type == v1.ServiceTypeExternalName { 386 // services with Type ExternalName receive no endpoints from this controller; 387 // Ref: https://issues.k8s.io/105986 388 return nil 389 } 390 391 if service.Spec.Selector == nil { 392 // services without a selector receive no endpoints from this controller; 393 // these services will receive the endpoints that are created out-of-band via the REST API. 394 return nil 395 } 396 397 logger.V(5).Info("About to update endpoints for service", "service", klog.KRef(namespace, name)) 398 pods, err := e.podLister.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelectorPreValidated()) 399 if err != nil { 400 // Since we're getting stuff from a local cache, it is 401 // basically impossible to get this error. 402 return err 403 } 404 405 // We call ComputeEndpointLastChangeTriggerTime here to make sure that the 406 // state of the trigger time tracker gets updated even if the sync turns out 407 // to be no-op and we don't update the endpoints object. 408 endpointsLastChangeTriggerTime := e.triggerTimeTracker. 409 ComputeEndpointLastChangeTriggerTime(namespace, service, pods) 410 411 subsets := []v1.EndpointSubset{} 412 var totalReadyEps int 413 var totalNotReadyEps int 414 415 for _, pod := range pods { 416 if !endpointsliceutil.ShouldPodBeInEndpoints(pod, service.Spec.PublishNotReadyAddresses) { 417 logger.V(5).Info("Pod is not included on endpoints for Service", "pod", klog.KObj(pod), "service", klog.KObj(service)) 418 continue 419 } 420 421 ep, err := podToEndpointAddressForService(service, pod) 422 if err != nil { 423 // this will happen, if the cluster runs with some nodes configured as dual stack and some as not 424 // such as the case of an upgrade.. 425 logger.V(2).Info("Failed to find endpoint for service with ClusterIP on pod with error", "service", klog.KObj(service), "clusterIP", service.Spec.ClusterIP, "pod", klog.KObj(pod), "error", err) 426 continue 427 } 428 429 epa := *ep 430 if endpointsliceutil.ShouldSetHostname(pod, service) { 431 epa.Hostname = pod.Spec.Hostname 432 } 433 434 // Allow headless service not to have ports. 435 if len(service.Spec.Ports) == 0 { 436 if service.Spec.ClusterIP == api.ClusterIPNone { 437 subsets, totalReadyEps, totalNotReadyEps = addEndpointSubset(logger, subsets, pod, epa, nil, service.Spec.PublishNotReadyAddresses) 438 // No need to repack subsets for headless service without ports. 439 } 440 } else { 441 for i := range service.Spec.Ports { 442 servicePort := &service.Spec.Ports[i] 443 portNum, err := podutil.FindPort(pod, servicePort) 444 if err != nil { 445 logger.V(4).Info("Failed to find port for service", "service", klog.KObj(service), "error", err) 446 continue 447 } 448 epp := endpointPortFromServicePort(servicePort, portNum) 449 450 var readyEps, notReadyEps int 451 subsets, readyEps, notReadyEps = addEndpointSubset(logger, subsets, pod, epa, epp, service.Spec.PublishNotReadyAddresses) 452 totalReadyEps = totalReadyEps + readyEps 453 totalNotReadyEps = totalNotReadyEps + notReadyEps 454 } 455 } 456 } 457 subsets = endpoints.RepackSubsets(subsets) 458 459 // See if there's actually an update here. 460 currentEndpoints, err := e.endpointsLister.Endpoints(service.Namespace).Get(service.Name) 461 if err != nil { 462 if !errors.IsNotFound(err) { 463 return err 464 } 465 currentEndpoints = &v1.Endpoints{ 466 ObjectMeta: metav1.ObjectMeta{ 467 Name: service.Name, 468 Labels: service.Labels, 469 }, 470 } 471 } 472 473 createEndpoints := len(currentEndpoints.ResourceVersion) == 0 474 475 // Compare the sorted subsets and labels 476 // Remove the HeadlessService label from the endpoints if it exists, 477 // as this won't be set on the service itself 478 // and will cause a false negative in this diff check. 479 // But first check if it has that label to avoid expensive copies. 480 compareLabels := currentEndpoints.Labels 481 if _, ok := currentEndpoints.Labels[v1.IsHeadlessService]; ok { 482 compareLabels = utillabels.CloneAndRemoveLabel(currentEndpoints.Labels, v1.IsHeadlessService) 483 } 484 // When comparing the subsets, we ignore the difference in ResourceVersion of Pod to avoid unnecessary Endpoints 485 // updates caused by Pod updates that we don't care, e.g. annotation update. 486 if !createEndpoints && 487 endpointSubsetsEqualIgnoreResourceVersion(currentEndpoints.Subsets, subsets) && 488 apiequality.Semantic.DeepEqual(compareLabels, service.Labels) && 489 capacityAnnotationSetCorrectly(currentEndpoints.Annotations, currentEndpoints.Subsets) { 490 logger.V(5).Info("endpoints are equal, skipping update", "service", klog.KObj(service)) 491 return nil 492 } 493 newEndpoints := currentEndpoints.DeepCopy() 494 newEndpoints.Subsets = subsets 495 newEndpoints.Labels = service.Labels 496 if newEndpoints.Annotations == nil { 497 newEndpoints.Annotations = make(map[string]string) 498 } 499 500 if !endpointsLastChangeTriggerTime.IsZero() { 501 newEndpoints.Annotations[v1.EndpointsLastChangeTriggerTime] = 502 endpointsLastChangeTriggerTime.UTC().Format(time.RFC3339Nano) 503 } else { // No new trigger time, clear the annotation. 504 delete(newEndpoints.Annotations, v1.EndpointsLastChangeTriggerTime) 505 } 506 507 if truncateEndpoints(newEndpoints) { 508 newEndpoints.Annotations[v1.EndpointsOverCapacity] = truncated 509 } else { 510 delete(newEndpoints.Annotations, v1.EndpointsOverCapacity) 511 } 512 513 if newEndpoints.Labels == nil { 514 newEndpoints.Labels = make(map[string]string) 515 } 516 517 if !helper.IsServiceIPSet(service) { 518 newEndpoints.Labels = utillabels.CloneAndAddLabel(newEndpoints.Labels, v1.IsHeadlessService, "") 519 } else { 520 newEndpoints.Labels = utillabels.CloneAndRemoveLabel(newEndpoints.Labels, v1.IsHeadlessService) 521 } 522 523 logger.V(4).Info("Update endpoints", "service", klog.KObj(service), "readyEndpoints", totalReadyEps, "notreadyEndpoints", totalNotReadyEps) 524 if createEndpoints { 525 // No previous endpoints, create them 526 _, err = e.client.CoreV1().Endpoints(service.Namespace).Create(ctx, newEndpoints, metav1.CreateOptions{}) 527 } else { 528 // Pre-existing 529 _, err = e.client.CoreV1().Endpoints(service.Namespace).Update(ctx, newEndpoints, metav1.UpdateOptions{}) 530 } 531 if err != nil { 532 if createEndpoints && errors.IsForbidden(err) { 533 // A request is forbidden primarily for two reasons: 534 // 1. namespace is terminating, endpoint creation is not allowed by default. 535 // 2. policy is misconfigured, in which case no service would function anywhere. 536 // Given the frequency of 1, we log at a lower level. 537 logger.V(5).Info("Forbidden from creating endpoints", "error", err) 538 539 // If the namespace is terminating, creates will continue to fail. Simply drop the item. 540 if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { 541 return nil 542 } 543 } 544 545 if createEndpoints { 546 e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToCreateEndpoint", "Failed to create endpoint for service %v/%v: %v", service.Namespace, service.Name, err) 547 } else { 548 e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToUpdateEndpoint", "Failed to update endpoint %v/%v: %v", service.Namespace, service.Name, err) 549 } 550 551 return err 552 } 553 return nil 554 } 555 556 // checkLeftoverEndpoints lists all currently existing endpoints and adds their 557 // service to the queue. This will detect endpoints that exist with no 558 // corresponding service; these endpoints need to be deleted. We only need to 559 // do this once on startup, because in steady-state these are detected (but 560 // some stragglers could have been left behind if the endpoint controller 561 // reboots). 562 func (e *Controller) checkLeftoverEndpoints() { 563 list, err := e.endpointsLister.List(labels.Everything()) 564 if err != nil { 565 utilruntime.HandleError(fmt.Errorf("Unable to list endpoints (%v); orphaned endpoints will not be cleaned up. (They're pretty harmless, but you can restart this component if you want another attempt made.)", err)) 566 return 567 } 568 for _, ep := range list { 569 if _, ok := ep.Annotations[resourcelock.LeaderElectionRecordAnnotationKey]; ok { 570 // when there are multiple controller-manager instances, 571 // we observe that it will delete leader-election endpoints after 5min 572 // and cause re-election 573 // so skip the delete here 574 // as leader-election only have endpoints without service 575 continue 576 } 577 key, err := controller.KeyFunc(ep) 578 if err != nil { 579 utilruntime.HandleError(fmt.Errorf("Unable to get key for endpoint %#v", ep)) 580 continue 581 } 582 e.queue.Add(key) 583 } 584 } 585 586 // addEndpointSubset add the endpoints addresses and ports to the EndpointSubset. 587 // The addresses are added to the corresponding field, ready or not ready, depending 588 // on the pod status and the Service PublishNotReadyAddresses field value. 589 // The pod passed to this function must have already been filtered through ShouldPodBeInEndpoints. 590 func addEndpointSubset(logger klog.Logger, subsets []v1.EndpointSubset, pod *v1.Pod, epa v1.EndpointAddress, 591 epp *v1.EndpointPort, tolerateUnreadyEndpoints bool) ([]v1.EndpointSubset, int, int) { 592 var readyEps int 593 var notReadyEps int 594 ports := []v1.EndpointPort{} 595 if epp != nil { 596 ports = append(ports, *epp) 597 } 598 if tolerateUnreadyEndpoints || podutil.IsPodReady(pod) { 599 subsets = append(subsets, v1.EndpointSubset{ 600 Addresses: []v1.EndpointAddress{epa}, 601 Ports: ports, 602 }) 603 readyEps++ 604 } else { // if it is not a ready address it has to be not ready 605 logger.V(5).Info("Pod is out of service", "pod", klog.KObj(pod)) 606 subsets = append(subsets, v1.EndpointSubset{ 607 NotReadyAddresses: []v1.EndpointAddress{epa}, 608 Ports: ports, 609 }) 610 notReadyEps++ 611 } 612 return subsets, readyEps, notReadyEps 613 } 614 615 func endpointPortFromServicePort(servicePort *v1.ServicePort, portNum int) *v1.EndpointPort { 616 return &v1.EndpointPort{ 617 Name: servicePort.Name, 618 Port: int32(portNum), 619 Protocol: servicePort.Protocol, 620 AppProtocol: servicePort.AppProtocol, 621 } 622 } 623 624 // capacityAnnotationSetCorrectly returns false if number of endpoints is greater than maxCapacity or 625 // returns true if underCapacity and the annotation is not set. 626 func capacityAnnotationSetCorrectly(annotations map[string]string, subsets []v1.EndpointSubset) bool { 627 numEndpoints := 0 628 for _, subset := range subsets { 629 numEndpoints += len(subset.Addresses) + len(subset.NotReadyAddresses) 630 } 631 if numEndpoints > maxCapacity { 632 // If subsets are over capacity, they must be truncated so consider 633 // the annotation as not set correctly 634 return false 635 } 636 _, ok := annotations[v1.EndpointsOverCapacity] 637 return !ok 638 } 639 640 // truncateEndpoints by best effort will distribute the endpoints over the subsets based on the proportion 641 // of endpoints per subset and will prioritize Ready Endpoints over NotReady Endpoints. 642 func truncateEndpoints(endpoints *v1.Endpoints) bool { 643 totalReady := 0 644 totalNotReady := 0 645 for _, subset := range endpoints.Subsets { 646 totalReady += len(subset.Addresses) 647 totalNotReady += len(subset.NotReadyAddresses) 648 } 649 650 if totalReady+totalNotReady <= maxCapacity { 651 return false 652 } 653 654 truncateReady := false 655 max := maxCapacity - totalReady 656 numTotal := totalNotReady 657 if totalReady > maxCapacity { 658 truncateReady = true 659 max = maxCapacity 660 numTotal = totalReady 661 } 662 canBeAdded := max 663 664 for i := range endpoints.Subsets { 665 subset := endpoints.Subsets[i] 666 numInSubset := len(subset.Addresses) 667 if !truncateReady { 668 numInSubset = len(subset.NotReadyAddresses) 669 } 670 671 // The number of endpoints per subset will be based on the propotion of endpoints 672 // in this subset versus the total number of endpoints. The proportion of endpoints 673 // will be rounded up which most likely will lead to the last subset having less 674 // endpoints than the expected proportion. 675 toBeAdded := int(math.Ceil((float64(numInSubset) / float64(numTotal)) * float64(max))) 676 // If there is not enough endpoints for the last subset, ensure only the number up 677 // to the capacity are added 678 if toBeAdded > canBeAdded { 679 toBeAdded = canBeAdded 680 } 681 682 if truncateReady { 683 // Truncate ready Addresses to allocated proportion and truncate all not ready 684 // addresses 685 subset.Addresses = addressSubset(subset.Addresses, toBeAdded) 686 subset.NotReadyAddresses = []v1.EndpointAddress{} 687 canBeAdded -= len(subset.Addresses) 688 } else { 689 // Only truncate the not ready addresses 690 subset.NotReadyAddresses = addressSubset(subset.NotReadyAddresses, toBeAdded) 691 canBeAdded -= len(subset.NotReadyAddresses) 692 } 693 endpoints.Subsets[i] = subset 694 } 695 return true 696 } 697 698 // addressSubset takes a list of addresses and returns a subset if the length is greater 699 // than the maxNum. If less than the maxNum, the entire list is returned. 700 func addressSubset(addresses []v1.EndpointAddress, maxNum int) []v1.EndpointAddress { 701 if len(addresses) <= maxNum { 702 return addresses 703 } 704 return addresses[0:maxNum] 705 } 706 707 // semanticIgnoreResourceVersion does semantic deep equality checks for objects 708 // but excludes ResourceVersion of ObjectReference. They are used when comparing 709 // endpoints in Endpoints and EndpointSlice objects to avoid unnecessary updates 710 // caused by Pod resourceVersion change. 711 var semanticIgnoreResourceVersion = conversion.EqualitiesOrDie( 712 func(a, b v1.ObjectReference) bool { 713 a.ResourceVersion = "" 714 b.ResourceVersion = "" 715 return a == b 716 }, 717 ) 718 719 // endpointSubsetsEqualIgnoreResourceVersion returns true if EndpointSubsets 720 // have equal attributes but excludes ResourceVersion of Pod. 721 func endpointSubsetsEqualIgnoreResourceVersion(subsets1, subsets2 []v1.EndpointSubset) bool { 722 return semanticIgnoreResourceVersion.DeepEqual(subsets1, subsets2) 723 }