k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/endpoint/endpoints_controller.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/endpoint/endpoints_controller.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package endpoint
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"time"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/conversion"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    32  	"k8s.io/apimachinery/pkg/util/wait"
    33  	coreinformers "k8s.io/client-go/informers/core/v1"
    34  	clientset "k8s.io/client-go/kubernetes"
    35  	"k8s.io/client-go/kubernetes/scheme"
    36  	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
    37  	corelisters "k8s.io/client-go/listers/core/v1"
    38  	"k8s.io/client-go/tools/cache"
    39  	"k8s.io/client-go/tools/leaderelection/resourcelock"
    40  	"k8s.io/client-go/tools/record"
    41  	"k8s.io/client-go/util/workqueue"
    42  	endpointsliceutil "k8s.io/endpointslice/util"
    43  	"k8s.io/klog/v2"
    44  	"k8s.io/kubernetes/pkg/api/v1/endpoints"
    45  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    46  	api "k8s.io/kubernetes/pkg/apis/core"
    47  	helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
    48  	"k8s.io/kubernetes/pkg/controller"
    49  	utillabels "k8s.io/kubernetes/pkg/util/labels"
    50  	utilnet "k8s.io/utils/net"
    51  )
    52  
    53  const (
    54  	// maxRetries is the number of times a service will be retried before it is dropped out of the queue.
    55  	// With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the
    56  	// sequence of delays between successive queuings of a service.
    57  	//
    58  	// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s
    59  	maxRetries = 15
    60  
    61  	// maxCapacity represents the maximum number of addresses that should be
    62  	// stored in an Endpoints resource. In a future release, this controller
    63  	// may truncate endpoints exceeding this length.
    64  	maxCapacity = 1000
    65  
    66  	// truncated is a possible value for `endpoints.kubernetes.io/over-capacity` annotation on an
    67  	// endpoint resource and indicates that the number of endpoints have been truncated to
    68  	// maxCapacity
    69  	truncated = "truncated"
    70  )
    71  
    72  // NewEndpointController returns a new *Controller.
    73  func NewEndpointController(ctx context.Context, podInformer coreinformers.PodInformer, serviceInformer coreinformers.ServiceInformer,
    74  	endpointsInformer coreinformers.EndpointsInformer, client clientset.Interface, endpointUpdatesBatchPeriod time.Duration) *Controller {
    75  	broadcaster := record.NewBroadcaster(record.WithContext(ctx))
    76  	recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-controller"})
    77  
    78  	e := &Controller{
    79  		client: client,
    80  		queue: workqueue.NewTypedRateLimitingQueueWithConfig(
    81  			workqueue.DefaultTypedControllerRateLimiter[string](),
    82  			workqueue.TypedRateLimitingQueueConfig[string]{
    83  				Name: "endpoint",
    84  			},
    85  		),
    86  		workerLoopPeriod: time.Second,
    87  	}
    88  
    89  	serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
    90  		AddFunc: e.onServiceUpdate,
    91  		UpdateFunc: func(old, cur interface{}) {
    92  			e.onServiceUpdate(cur)
    93  		},
    94  		DeleteFunc: e.onServiceDelete,
    95  	})
    96  	e.serviceLister = serviceInformer.Lister()
    97  	e.servicesSynced = serviceInformer.Informer().HasSynced
    98  
    99  	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   100  		AddFunc:    e.addPod,
   101  		UpdateFunc: e.updatePod,
   102  		DeleteFunc: e.deletePod,
   103  	})
   104  	e.podLister = podInformer.Lister()
   105  	e.podsSynced = podInformer.Informer().HasSynced
   106  
   107  	endpointsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   108  		DeleteFunc: e.onEndpointsDelete,
   109  	})
   110  	e.endpointsLister = endpointsInformer.Lister()
   111  	e.endpointsSynced = endpointsInformer.Informer().HasSynced
   112  
   113  	e.triggerTimeTracker = endpointsliceutil.NewTriggerTimeTracker()
   114  	e.eventBroadcaster = broadcaster
   115  	e.eventRecorder = recorder
   116  
   117  	e.endpointUpdatesBatchPeriod = endpointUpdatesBatchPeriod
   118  
   119  	return e
   120  }
   121  
   122  // Controller manages selector-based service endpoints.
   123  type Controller struct {
   124  	client           clientset.Interface
   125  	eventBroadcaster record.EventBroadcaster
   126  	eventRecorder    record.EventRecorder
   127  
   128  	// serviceLister is able to list/get services and is populated by the shared informer passed to
   129  	// NewEndpointController.
   130  	serviceLister corelisters.ServiceLister
   131  	// servicesSynced returns true if the service shared informer has been synced at least once.
   132  	// Added as a member to the struct to allow injection for testing.
   133  	servicesSynced cache.InformerSynced
   134  
   135  	// podLister is able to list/get pods and is populated by the shared informer passed to
   136  	// NewEndpointController.
   137  	podLister corelisters.PodLister
   138  	// podsSynced returns true if the pod shared informer has been synced at least once.
   139  	// Added as a member to the struct to allow injection for testing.
   140  	podsSynced cache.InformerSynced
   141  
   142  	// endpointsLister is able to list/get endpoints and is populated by the shared informer passed to
   143  	// NewEndpointController.
   144  	endpointsLister corelisters.EndpointsLister
   145  	// endpointsSynced returns true if the endpoints shared informer has been synced at least once.
   146  	// Added as a member to the struct to allow injection for testing.
   147  	endpointsSynced cache.InformerSynced
   148  
   149  	// Services that need to be updated. A channel is inappropriate here,
   150  	// because it allows services with lots of pods to be serviced much
   151  	// more often than services with few pods; it also would cause a
   152  	// service that's inserted multiple times to be processed more than
   153  	// necessary.
   154  	queue workqueue.TypedRateLimitingInterface[string]
   155  
   156  	// workerLoopPeriod is the time between worker runs. The workers process the queue of service and pod changes.
   157  	workerLoopPeriod time.Duration
   158  
   159  	// triggerTimeTracker is an util used to compute and export the EndpointsLastChangeTriggerTime
   160  	// annotation.
   161  	triggerTimeTracker *endpointsliceutil.TriggerTimeTracker
   162  
   163  	endpointUpdatesBatchPeriod time.Duration
   164  }
   165  
   166  // Run will not return until stopCh is closed. workers determines how many
   167  // endpoints will be handled in parallel.
   168  func (e *Controller) Run(ctx context.Context, workers int) {
   169  	defer utilruntime.HandleCrash()
   170  
   171  	// Start events processing pipeline.
   172  	e.eventBroadcaster.StartStructuredLogging(3)
   173  	e.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: e.client.CoreV1().Events("")})
   174  	defer e.eventBroadcaster.Shutdown()
   175  
   176  	defer e.queue.ShutDown()
   177  
   178  	logger := klog.FromContext(ctx)
   179  	logger.Info("Starting endpoint controller")
   180  	defer logger.Info("Shutting down endpoint controller")
   181  
   182  	if !cache.WaitForNamedCacheSync("endpoint", ctx.Done(), e.podsSynced, e.servicesSynced, e.endpointsSynced) {
   183  		return
   184  	}
   185  
   186  	for i := 0; i < workers; i++ {
   187  		go wait.UntilWithContext(ctx, e.worker, e.workerLoopPeriod)
   188  	}
   189  
   190  	go func() {
   191  		defer utilruntime.HandleCrash()
   192  		e.checkLeftoverEndpoints()
   193  	}()
   194  
   195  	<-ctx.Done()
   196  }
   197  
   198  // When a pod is added, figure out what services it will be a member of and
   199  // enqueue them. obj must have *v1.Pod type.
   200  func (e *Controller) addPod(obj interface{}) {
   201  	pod := obj.(*v1.Pod)
   202  	services, err := endpointsliceutil.GetPodServiceMemberships(e.serviceLister, pod)
   203  	if err != nil {
   204  		utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", pod.Namespace, pod.Name, err))
   205  		return
   206  	}
   207  	for key := range services {
   208  		e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod)
   209  	}
   210  }
   211  
   212  func podToEndpointAddressForService(svc *v1.Service, pod *v1.Pod) (*v1.EndpointAddress, error) {
   213  	var endpointIP string
   214  	ipFamily := v1.IPv4Protocol
   215  
   216  	if len(svc.Spec.IPFamilies) > 0 {
   217  		// controller is connected to an api-server that correctly sets IPFamilies
   218  		ipFamily = svc.Spec.IPFamilies[0] // this works for headful and headless
   219  	} else {
   220  		// controller is connected to an api server that does not correctly
   221  		// set IPFamilies (e.g. old api-server during an upgrade)
   222  		// TODO (khenidak): remove by when the possibility of upgrading
   223  		// from a cluster that does not support dual stack is nil
   224  		if len(svc.Spec.ClusterIP) > 0 && svc.Spec.ClusterIP != v1.ClusterIPNone {
   225  			// headful service. detect via service clusterIP
   226  			if utilnet.IsIPv6String(svc.Spec.ClusterIP) {
   227  				ipFamily = v1.IPv6Protocol
   228  			}
   229  		} else {
   230  			// Since this is a headless service we use podIP to identify the family.
   231  			// This assumes that status.PodIP is assigned correctly (follows pod cidr and
   232  			// pod cidr list order is same as service cidr list order). The expectation is
   233  			// this is *most probably* the case.
   234  
   235  			// if the family was incorrectly identified then this will be corrected once the
   236  			// upgrade is completed (controller connects to api-server that correctly defaults services)
   237  			if utilnet.IsIPv6String(pod.Status.PodIP) {
   238  				ipFamily = v1.IPv6Protocol
   239  			}
   240  		}
   241  	}
   242  
   243  	// find an ip that matches the family
   244  	for _, podIP := range pod.Status.PodIPs {
   245  		if (ipFamily == v1.IPv6Protocol) == utilnet.IsIPv6String(podIP.IP) {
   246  			endpointIP = podIP.IP
   247  			break
   248  		}
   249  	}
   250  
   251  	if endpointIP == "" {
   252  		return nil, fmt.Errorf("failed to find a matching endpoint for service %v", svc.Name)
   253  	}
   254  
   255  	return &v1.EndpointAddress{
   256  		IP:       endpointIP,
   257  		NodeName: &pod.Spec.NodeName,
   258  		TargetRef: &v1.ObjectReference{
   259  			Kind:      "Pod",
   260  			Namespace: pod.ObjectMeta.Namespace,
   261  			Name:      pod.ObjectMeta.Name,
   262  			UID:       pod.ObjectMeta.UID,
   263  		},
   264  	}, nil
   265  }
   266  
   267  // When a pod is updated, figure out what services it used to be a member of
   268  // and what services it will be a member of, and enqueue the union of these.
   269  // old and cur must be *v1.Pod types.
   270  func (e *Controller) updatePod(old, cur interface{}) {
   271  	services := endpointsliceutil.GetServicesToUpdateOnPodChange(e.serviceLister, old, cur)
   272  	for key := range services {
   273  		e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod)
   274  	}
   275  }
   276  
   277  // When a pod is deleted, enqueue the services the pod used to be a member of.
   278  // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item.
   279  func (e *Controller) deletePod(obj interface{}) {
   280  	pod := endpointsliceutil.GetPodFromDeleteAction(obj)
   281  	if pod != nil {
   282  		e.addPod(pod)
   283  	}
   284  }
   285  
   286  // onServiceUpdate updates the Service Selector in the cache and queues the Service for processing.
   287  func (e *Controller) onServiceUpdate(obj interface{}) {
   288  	key, err := controller.KeyFunc(obj)
   289  	if err != nil {
   290  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
   291  		return
   292  	}
   293  	e.queue.Add(key)
   294  }
   295  
   296  // onServiceDelete removes the Service Selector from the cache and queues the Service for processing.
   297  func (e *Controller) onServiceDelete(obj interface{}) {
   298  	key, err := controller.KeyFunc(obj)
   299  	if err != nil {
   300  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
   301  		return
   302  	}
   303  	e.queue.Add(key)
   304  }
   305  
   306  func (e *Controller) onEndpointsDelete(obj interface{}) {
   307  	key, err := controller.KeyFunc(obj)
   308  	if err != nil {
   309  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
   310  		return
   311  	}
   312  	e.queue.Add(key)
   313  }
   314  
   315  // worker runs a worker thread that just dequeues items, processes them, and
   316  // marks them done. You may run as many of these in parallel as you wish; the
   317  // workqueue guarantees that they will not end up processing the same service
   318  // at the same time.
   319  func (e *Controller) worker(ctx context.Context) {
   320  	for e.processNextWorkItem(ctx) {
   321  	}
   322  }
   323  
   324  func (e *Controller) processNextWorkItem(ctx context.Context) bool {
   325  	eKey, quit := e.queue.Get()
   326  	if quit {
   327  		return false
   328  	}
   329  	defer e.queue.Done(eKey)
   330  
   331  	logger := klog.FromContext(ctx)
   332  	err := e.syncService(ctx, eKey)
   333  	e.handleErr(logger, err, eKey)
   334  
   335  	return true
   336  }
   337  
   338  func (e *Controller) handleErr(logger klog.Logger, err error, key string) {
   339  	if err == nil {
   340  		e.queue.Forget(key)
   341  		return
   342  	}
   343  
   344  	ns, name, keyErr := cache.SplitMetaNamespaceKey(key)
   345  	if keyErr != nil {
   346  		logger.Error(err, "Failed to split meta namespace cache key", "key", key)
   347  	}
   348  
   349  	if e.queue.NumRequeues(key) < maxRetries {
   350  		logger.V(2).Info("Error syncing endpoints, retrying", "service", klog.KRef(ns, name), "err", err)
   351  		e.queue.AddRateLimited(key)
   352  		return
   353  	}
   354  
   355  	logger.Info("Dropping service out of the queue", "service", klog.KRef(ns, name), "err", err)
   356  	e.queue.Forget(key)
   357  	utilruntime.HandleError(err)
   358  }
   359  
   360  func (e *Controller) syncService(ctx context.Context, key string) error {
   361  	startTime := time.Now()
   362  	logger := klog.FromContext(ctx)
   363  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   364  	if err != nil {
   365  		return err
   366  	}
   367  	defer func() {
   368  		logger.V(4).Info("Finished syncing service endpoints", "service", klog.KRef(namespace, name), "startTime", time.Since(startTime))
   369  	}()
   370  
   371  	service, err := e.serviceLister.Services(namespace).Get(name)
   372  	if err != nil {
   373  		if !errors.IsNotFound(err) {
   374  			return err
   375  		}
   376  
   377  		// Delete the corresponding endpoint, as the service has been deleted.
   378  		// TODO: Please note that this will delete an endpoint when a
   379  		// service is deleted. However, if we're down at the time when
   380  		// the service is deleted, we will miss that deletion, so this
   381  		// doesn't completely solve the problem. See #6877.
   382  		err = e.client.CoreV1().Endpoints(namespace).Delete(ctx, name, metav1.DeleteOptions{})
   383  		if err != nil && !errors.IsNotFound(err) {
   384  			return err
   385  		}
   386  		e.triggerTimeTracker.DeleteService(namespace, name)
   387  		return nil
   388  	}
   389  
   390  	if service.Spec.Type == v1.ServiceTypeExternalName {
   391  		// services with Type ExternalName receive no endpoints from this controller;
   392  		// Ref: https://issues.k8s.io/105986
   393  		return nil
   394  	}
   395  
   396  	if service.Spec.Selector == nil {
   397  		// services without a selector receive no endpoints from this controller;
   398  		// these services will receive the endpoints that are created out-of-band via the REST API.
   399  		return nil
   400  	}
   401  
   402  	logger.V(5).Info("About to update endpoints for service", "service", klog.KRef(namespace, name))
   403  	pods, err := e.podLister.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelectorPreValidated())
   404  	if err != nil {
   405  		// Since we're getting stuff from a local cache, it is
   406  		// basically impossible to get this error.
   407  		return err
   408  	}
   409  
   410  	// We call ComputeEndpointLastChangeTriggerTime here to make sure that the
   411  	// state of the trigger time tracker gets updated even if the sync turns out
   412  	// to be no-op and we don't update the endpoints object.
   413  	endpointsLastChangeTriggerTime := e.triggerTimeTracker.
   414  		ComputeEndpointLastChangeTriggerTime(namespace, service, pods)
   415  
   416  	subsets := []v1.EndpointSubset{}
   417  	var totalReadyEps int
   418  	var totalNotReadyEps int
   419  
   420  	for _, pod := range pods {
   421  		if !endpointsliceutil.ShouldPodBeInEndpoints(pod, service.Spec.PublishNotReadyAddresses) {
   422  			logger.V(5).Info("Pod is not included on endpoints for Service", "pod", klog.KObj(pod), "service", klog.KObj(service))
   423  			continue
   424  		}
   425  
   426  		ep, err := podToEndpointAddressForService(service, pod)
   427  		if err != nil {
   428  			// this will happen, if the cluster runs with some nodes configured as dual stack and some as not
   429  			// such as the case of an upgrade..
   430  			logger.V(2).Info("Failed to find endpoint for service with ClusterIP on pod with error", "service", klog.KObj(service), "clusterIP", service.Spec.ClusterIP, "pod", klog.KObj(pod), "error", err)
   431  			continue
   432  		}
   433  
   434  		epa := *ep
   435  		if endpointsliceutil.ShouldSetHostname(pod, service) {
   436  			epa.Hostname = pod.Spec.Hostname
   437  		}
   438  
   439  		// Allow headless service not to have ports.
   440  		if len(service.Spec.Ports) == 0 {
   441  			if service.Spec.ClusterIP == api.ClusterIPNone {
   442  				subsets, totalReadyEps, totalNotReadyEps = addEndpointSubset(logger, subsets, pod, epa, nil, service.Spec.PublishNotReadyAddresses)
   443  				// No need to repack subsets for headless service without ports.
   444  			}
   445  		} else {
   446  			for i := range service.Spec.Ports {
   447  				servicePort := &service.Spec.Ports[i]
   448  				portNum, err := podutil.FindPort(pod, servicePort)
   449  				if err != nil {
   450  					logger.V(4).Info("Failed to find port for service", "service", klog.KObj(service), "error", err)
   451  					continue
   452  				}
   453  				epp := endpointPortFromServicePort(servicePort, portNum)
   454  
   455  				var readyEps, notReadyEps int
   456  				subsets, readyEps, notReadyEps = addEndpointSubset(logger, subsets, pod, epa, epp, service.Spec.PublishNotReadyAddresses)
   457  				totalReadyEps = totalReadyEps + readyEps
   458  				totalNotReadyEps = totalNotReadyEps + notReadyEps
   459  			}
   460  		}
   461  	}
   462  	subsets = endpoints.RepackSubsets(subsets)
   463  
   464  	// See if there's actually an update here.
   465  	currentEndpoints, err := e.endpointsLister.Endpoints(service.Namespace).Get(service.Name)
   466  	if err != nil {
   467  		if !errors.IsNotFound(err) {
   468  			return err
   469  		}
   470  		currentEndpoints = &v1.Endpoints{
   471  			ObjectMeta: metav1.ObjectMeta{
   472  				Name:   service.Name,
   473  				Labels: service.Labels,
   474  			},
   475  		}
   476  	}
   477  
   478  	createEndpoints := len(currentEndpoints.ResourceVersion) == 0
   479  
   480  	// Compare the sorted subsets and labels
   481  	// Remove the HeadlessService label from the endpoints if it exists,
   482  	// as this won't be set on the service itself
   483  	// and will cause a false negative in this diff check.
   484  	// But first check if it has that label to avoid expensive copies.
   485  	compareLabels := currentEndpoints.Labels
   486  	if _, ok := currentEndpoints.Labels[v1.IsHeadlessService]; ok {
   487  		compareLabels = utillabels.CloneAndRemoveLabel(currentEndpoints.Labels, v1.IsHeadlessService)
   488  	}
   489  	// When comparing the subsets, we ignore the difference in ResourceVersion of Pod to avoid unnecessary Endpoints
   490  	// updates caused by Pod updates that we don't care, e.g. annotation update.
   491  	if !createEndpoints &&
   492  		endpointSubsetsEqualIgnoreResourceVersion(currentEndpoints.Subsets, subsets) &&
   493  		apiequality.Semantic.DeepEqual(compareLabels, service.Labels) &&
   494  		capacityAnnotationSetCorrectly(currentEndpoints.Annotations, currentEndpoints.Subsets) {
   495  		logger.V(5).Info("endpoints are equal, skipping update", "service", klog.KObj(service))
   496  		return nil
   497  	}
   498  	newEndpoints := currentEndpoints.DeepCopy()
   499  	newEndpoints.Subsets = subsets
   500  	newEndpoints.Labels = service.Labels
   501  	if newEndpoints.Annotations == nil {
   502  		newEndpoints.Annotations = make(map[string]string)
   503  	}
   504  
   505  	if !endpointsLastChangeTriggerTime.IsZero() {
   506  		newEndpoints.Annotations[v1.EndpointsLastChangeTriggerTime] =
   507  			endpointsLastChangeTriggerTime.UTC().Format(time.RFC3339Nano)
   508  	} else { // No new trigger time, clear the annotation.
   509  		delete(newEndpoints.Annotations, v1.EndpointsLastChangeTriggerTime)
   510  	}
   511  
   512  	if truncateEndpoints(newEndpoints) {
   513  		newEndpoints.Annotations[v1.EndpointsOverCapacity] = truncated
   514  	} else {
   515  		delete(newEndpoints.Annotations, v1.EndpointsOverCapacity)
   516  	}
   517  
   518  	if newEndpoints.Labels == nil {
   519  		newEndpoints.Labels = make(map[string]string)
   520  	}
   521  
   522  	if !helper.IsServiceIPSet(service) {
   523  		newEndpoints.Labels = utillabels.CloneAndAddLabel(newEndpoints.Labels, v1.IsHeadlessService, "")
   524  	} else {
   525  		newEndpoints.Labels = utillabels.CloneAndRemoveLabel(newEndpoints.Labels, v1.IsHeadlessService)
   526  	}
   527  
   528  	logger.V(4).Info("Update endpoints", "service", klog.KObj(service), "readyEndpoints", totalReadyEps, "notreadyEndpoints", totalNotReadyEps)
   529  	if createEndpoints {
   530  		// No previous endpoints, create them
   531  		_, err = e.client.CoreV1().Endpoints(service.Namespace).Create(ctx, newEndpoints, metav1.CreateOptions{})
   532  	} else {
   533  		// Pre-existing
   534  		_, err = e.client.CoreV1().Endpoints(service.Namespace).Update(ctx, newEndpoints, metav1.UpdateOptions{})
   535  	}
   536  	if err != nil {
   537  		if createEndpoints && errors.IsForbidden(err) {
   538  			// A request is forbidden primarily for two reasons:
   539  			// 1. namespace is terminating, endpoint creation is not allowed by default.
   540  			// 2. policy is misconfigured, in which case no service would function anywhere.
   541  			// Given the frequency of 1, we log at a lower level.
   542  			logger.V(5).Info("Forbidden from creating endpoints", "error", err)
   543  
   544  			// If the namespace is terminating, creates will continue to fail. Simply drop the item.
   545  			if errors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
   546  				return nil
   547  			}
   548  		}
   549  
   550  		if createEndpoints {
   551  			e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToCreateEndpoint", "Failed to create endpoint for service %v/%v: %v", service.Namespace, service.Name, err)
   552  		} else {
   553  			e.eventRecorder.Eventf(newEndpoints, v1.EventTypeWarning, "FailedToUpdateEndpoint", "Failed to update endpoint %v/%v: %v", service.Namespace, service.Name, err)
   554  		}
   555  
   556  		return err
   557  	}
   558  	return nil
   559  }
   560  
   561  // checkLeftoverEndpoints lists all currently existing endpoints and adds their
   562  // service to the queue. This will detect endpoints that exist with no
   563  // corresponding service; these endpoints need to be deleted. We only need to
   564  // do this once on startup, because in steady-state these are detected (but
   565  // some stragglers could have been left behind if the endpoint controller
   566  // reboots).
   567  func (e *Controller) checkLeftoverEndpoints() {
   568  	list, err := e.endpointsLister.List(labels.Everything())
   569  	if err != nil {
   570  		utilruntime.HandleError(fmt.Errorf("Unable to list endpoints (%v); orphaned endpoints will not be cleaned up. (They're pretty harmless, but you can restart this component if you want another attempt made.)", err))
   571  		return
   572  	}
   573  	for _, ep := range list {
   574  		if _, ok := ep.Annotations[resourcelock.LeaderElectionRecordAnnotationKey]; ok {
   575  			// when there are multiple controller-manager instances,
   576  			// we observe that it will delete leader-election endpoints after 5min
   577  			// and cause re-election
   578  			// so skip the delete here
   579  			// as leader-election only have endpoints without service
   580  			continue
   581  		}
   582  		key, err := controller.KeyFunc(ep)
   583  		if err != nil {
   584  			utilruntime.HandleError(fmt.Errorf("Unable to get key for endpoint %#v", ep))
   585  			continue
   586  		}
   587  		e.queue.Add(key)
   588  	}
   589  }
   590  
   591  // addEndpointSubset add the endpoints addresses and ports to the EndpointSubset.
   592  // The addresses are added to the corresponding field, ready or not ready, depending
   593  // on the pod status and the Service PublishNotReadyAddresses field value.
   594  // The pod passed to this function must have already been filtered through ShouldPodBeInEndpoints.
   595  func addEndpointSubset(logger klog.Logger, subsets []v1.EndpointSubset, pod *v1.Pod, epa v1.EndpointAddress,
   596  	epp *v1.EndpointPort, tolerateUnreadyEndpoints bool) ([]v1.EndpointSubset, int, int) {
   597  	var readyEps int
   598  	var notReadyEps int
   599  	ports := []v1.EndpointPort{}
   600  	if epp != nil {
   601  		ports = append(ports, *epp)
   602  	}
   603  	if tolerateUnreadyEndpoints || podutil.IsPodReady(pod) {
   604  		subsets = append(subsets, v1.EndpointSubset{
   605  			Addresses: []v1.EndpointAddress{epa},
   606  			Ports:     ports,
   607  		})
   608  		readyEps++
   609  	} else { // if it is not a ready address it has to be not ready
   610  		logger.V(5).Info("Pod is out of service", "pod", klog.KObj(pod))
   611  		subsets = append(subsets, v1.EndpointSubset{
   612  			NotReadyAddresses: []v1.EndpointAddress{epa},
   613  			Ports:             ports,
   614  		})
   615  		notReadyEps++
   616  	}
   617  	return subsets, readyEps, notReadyEps
   618  }
   619  
   620  func endpointPortFromServicePort(servicePort *v1.ServicePort, portNum int) *v1.EndpointPort {
   621  	return &v1.EndpointPort{
   622  		Name:        servicePort.Name,
   623  		Port:        int32(portNum),
   624  		Protocol:    servicePort.Protocol,
   625  		AppProtocol: servicePort.AppProtocol,
   626  	}
   627  }
   628  
   629  // capacityAnnotationSetCorrectly returns false if number of endpoints is greater than maxCapacity or
   630  // returns true if underCapacity and the annotation is not set.
   631  func capacityAnnotationSetCorrectly(annotations map[string]string, subsets []v1.EndpointSubset) bool {
   632  	numEndpoints := 0
   633  	for _, subset := range subsets {
   634  		numEndpoints += len(subset.Addresses) + len(subset.NotReadyAddresses)
   635  	}
   636  	if numEndpoints > maxCapacity {
   637  		// If subsets are over capacity, they must be truncated so consider
   638  		// the annotation as not set correctly
   639  		return false
   640  	}
   641  	_, ok := annotations[v1.EndpointsOverCapacity]
   642  	return !ok
   643  }
   644  
   645  // truncateEndpoints by best effort will distribute the endpoints over the subsets based on the proportion
   646  // of endpoints per subset and will prioritize Ready Endpoints over NotReady Endpoints.
   647  func truncateEndpoints(endpoints *v1.Endpoints) bool {
   648  	totalReady := 0
   649  	totalNotReady := 0
   650  	for _, subset := range endpoints.Subsets {
   651  		totalReady += len(subset.Addresses)
   652  		totalNotReady += len(subset.NotReadyAddresses)
   653  	}
   654  
   655  	if totalReady+totalNotReady <= maxCapacity {
   656  		return false
   657  	}
   658  
   659  	truncateReady := false
   660  	max := maxCapacity - totalReady
   661  	numTotal := totalNotReady
   662  	if totalReady > maxCapacity {
   663  		truncateReady = true
   664  		max = maxCapacity
   665  		numTotal = totalReady
   666  	}
   667  	canBeAdded := max
   668  
   669  	for i := range endpoints.Subsets {
   670  		subset := endpoints.Subsets[i]
   671  		numInSubset := len(subset.Addresses)
   672  		if !truncateReady {
   673  			numInSubset = len(subset.NotReadyAddresses)
   674  		}
   675  
   676  		// The number of endpoints per subset will be based on the propotion of endpoints
   677  		// in this subset versus the total number of endpoints. The proportion of endpoints
   678  		// will be rounded up which most likely will lead to the last subset having less
   679  		// endpoints than the expected proportion.
   680  		toBeAdded := int(math.Ceil((float64(numInSubset) / float64(numTotal)) * float64(max)))
   681  		// If there is not enough endpoints for the last subset, ensure only the number up
   682  		// to the capacity are added
   683  		if toBeAdded > canBeAdded {
   684  			toBeAdded = canBeAdded
   685  		}
   686  
   687  		if truncateReady {
   688  			// Truncate ready Addresses to allocated proportion and truncate all not ready
   689  			// addresses
   690  			subset.Addresses = addressSubset(subset.Addresses, toBeAdded)
   691  			subset.NotReadyAddresses = []v1.EndpointAddress{}
   692  			canBeAdded -= len(subset.Addresses)
   693  		} else {
   694  			// Only truncate the not ready addresses
   695  			subset.NotReadyAddresses = addressSubset(subset.NotReadyAddresses, toBeAdded)
   696  			canBeAdded -= len(subset.NotReadyAddresses)
   697  		}
   698  		endpoints.Subsets[i] = subset
   699  	}
   700  	return true
   701  }
   702  
   703  // addressSubset takes a list of addresses and returns a subset if the length is greater
   704  // than the maxNum. If less than the maxNum, the entire list is returned.
   705  func addressSubset(addresses []v1.EndpointAddress, maxNum int) []v1.EndpointAddress {
   706  	if len(addresses) <= maxNum {
   707  		return addresses
   708  	}
   709  	return addresses[0:maxNum]
   710  }
   711  
   712  // semanticIgnoreResourceVersion does semantic deep equality checks for objects
   713  // but excludes ResourceVersion of ObjectReference. They are used when comparing
   714  // endpoints in Endpoints and EndpointSlice objects to avoid unnecessary updates
   715  // caused by Pod resourceVersion change.
   716  var semanticIgnoreResourceVersion = conversion.EqualitiesOrDie(
   717  	func(a, b v1.ObjectReference) bool {
   718  		a.ResourceVersion = ""
   719  		b.ResourceVersion = ""
   720  		return a == b
   721  	},
   722  )
   723  
   724  // endpointSubsetsEqualIgnoreResourceVersion returns true if EndpointSubsets
   725  // have equal attributes but excludes ResourceVersion of Pod.
   726  func endpointSubsetsEqualIgnoreResourceVersion(subsets1, subsets2 []v1.EndpointSubset) bool {
   727  	return semanticIgnoreResourceVersion.DeepEqual(subsets1, subsets2)
   728  }