istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/kube/controller/endpointslice.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package controller
    16  
    17  import (
    18  	"sync"
    19  
    20  	"github.com/hashicorp/go-multierror"
    21  	corev1 "k8s.io/api/core/v1"
    22  	v1 "k8s.io/api/discovery/v1"
    23  	"k8s.io/api/discovery/v1beta1"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	klabels "k8s.io/apimachinery/pkg/labels"
    26  	"k8s.io/apimachinery/pkg/selection"
    27  	"k8s.io/apimachinery/pkg/types"
    28  	mcs "sigs.k8s.io/mcs-api/pkg/apis/v1alpha1"
    29  
    30  	"istio.io/istio/pilot/pkg/features"
    31  	"istio.io/istio/pilot/pkg/model"
    32  	"istio.io/istio/pkg/config"
    33  	"istio.io/istio/pkg/config/host"
    34  	"istio.io/istio/pkg/config/schema/kind"
    35  	"istio.io/istio/pkg/config/visibility"
    36  	"istio.io/istio/pkg/kube/kclient"
    37  	"istio.io/istio/pkg/util/sets"
    38  )
    39  
    40  type endpointSliceController struct {
    41  	endpointCache *endpointSliceCache
    42  	slices        kclient.Client[*v1.EndpointSlice]
    43  	c             *Controller
    44  }
    45  
    46  var (
    47  	endpointSliceRequirement = labelRequirement(mcs.LabelServiceName, selection.DoesNotExist, nil)
    48  	endpointSliceSelector    = klabels.NewSelector().Add(*endpointSliceRequirement)
    49  )
    50  
    51  func newEndpointSliceController(c *Controller) *endpointSliceController {
    52  	slices := kclient.NewFiltered[*v1.EndpointSlice](c.client, kclient.Filter{ObjectFilter: c.client.ObjectFilter()})
    53  	out := &endpointSliceController{
    54  		c:             c,
    55  		slices:        slices,
    56  		endpointCache: newEndpointSliceCache(),
    57  	}
    58  	registerHandlers[*v1.EndpointSlice](c, slices, "EndpointSlice", out.onEvent, nil)
    59  	return out
    60  }
    61  
    62  func (esc *endpointSliceController) podArrived(name, ns string) error {
    63  	ep := esc.slices.Get(name, ns)
    64  	if ep == nil {
    65  		return nil
    66  	}
    67  	return esc.onEvent(nil, ep, model.EventAdd)
    68  }
    69  
    70  // initializeNamespace initializes endpoints for a given namespace.
    71  func (esc *endpointSliceController) initializeNamespace(ns string, filtered bool) error {
    72  	var err *multierror.Error
    73  	var endpoints []*v1.EndpointSlice
    74  	if filtered {
    75  		endpoints = esc.slices.List(ns, klabels.Everything())
    76  	} else {
    77  		endpoints = esc.slices.ListUnfiltered(ns, klabels.Everything())
    78  	}
    79  	log.Debugf("initializing %d endpointslices", len(endpoints))
    80  	for _, s := range endpoints {
    81  		err = multierror.Append(err, esc.onEvent(nil, s, model.EventAdd))
    82  	}
    83  	return err.ErrorOrNil()
    84  }
    85  
    86  func (esc *endpointSliceController) onEvent(_, ep *v1.EndpointSlice, event model.Event) error {
    87  	esc.onEventInternal(nil, ep, event)
    88  	return nil
    89  }
    90  
    91  func (esc *endpointSliceController) onEventInternal(_, ep *v1.EndpointSlice, event model.Event) {
    92  	esLabels := ep.GetLabels()
    93  	if !endpointSliceSelector.Matches(klabels.Set(esLabels)) {
    94  		return
    95  	}
    96  	// Update internal endpoint cache no matter what kind of service, even headless service.
    97  	// As for gateways, the cluster discovery type is `EDS` for headless service.
    98  	namespacedName := getServiceNamespacedName(ep)
    99  	log.Debugf("Handle EDS endpoint %s %s in namespace %s", namespacedName.Name, event, namespacedName.Namespace)
   100  	if event == model.EventDelete {
   101  		esc.deleteEndpointSlice(ep)
   102  	} else {
   103  		esc.updateEndpointSlice(ep)
   104  	}
   105  	hostnames := esc.c.hostNamesForNamespacedName(namespacedName)
   106  	// Trigger EDS push for all hostnames.
   107  	esc.pushEDS(hostnames, namespacedName.Namespace)
   108  
   109  	name := serviceNameForEndpointSlice(esLabels)
   110  	namespace := ep.GetNamespace()
   111  	svc := esc.c.services.Get(name, namespace)
   112  	if svc == nil || svc.Spec.ClusterIP != corev1.ClusterIPNone || svc.Spec.Type == corev1.ServiceTypeExternalName {
   113  		return
   114  	}
   115  
   116  	configs := []types.NamespacedName{}
   117  	pureHTTP := true
   118  	for _, modelSvc := range esc.c.servicesForNamespacedName(config.NamespacedName(svc)) {
   119  		// skip push if it is not exported
   120  		if modelSvc.Attributes.ExportTo.Contains(visibility.None) {
   121  			continue
   122  		}
   123  
   124  		configs = append(configs, types.NamespacedName{Name: modelSvc.Hostname.String(), Namespace: svc.Namespace})
   125  
   126  		for _, p := range modelSvc.Ports {
   127  			if !p.Protocol.IsHTTP() {
   128  				pureHTTP = false
   129  				break
   130  			}
   131  		}
   132  	}
   133  
   134  	configsUpdated := sets.New[model.ConfigKey]()
   135  	for _, config := range configs {
   136  		if !pureHTTP {
   137  			configsUpdated.Insert(model.ConfigKey{Kind: kind.ServiceEntry, Name: config.Name, Namespace: config.Namespace})
   138  		} else {
   139  			// pure HTTP headless services should not need a full push since they do not
   140  			// require a Listener based on IP: https://github.com/istio/istio/issues/48207
   141  			configsUpdated.Insert(model.ConfigKey{Kind: kind.DNSName, Name: config.Name, Namespace: config.Namespace})
   142  		}
   143  	}
   144  
   145  	if len(configsUpdated) > 0 {
   146  		// For headless services, trigger a full push.
   147  		// If EnableHeadlessService is true and svc ports are not pure HTTP, we need to regenerate listeners per endpoint.
   148  		// Otherwise we only need to push NDS, but still need to set full but we skip all other xDS except NDS during the push.
   149  		esc.c.opts.XDSUpdater.ConfigUpdate(&model.PushRequest{
   150  			Full:           true,
   151  			ConfigsUpdated: configsUpdated,
   152  			Reason:         model.NewReasonStats(model.HeadlessEndpointUpdate),
   153  		})
   154  	}
   155  }
   156  
   157  // GetProxyServiceTargets returns service instances co-located with a given proxy
   158  // TODO: this code does not return k8s service instances when the proxy's IP is a workload entry
   159  // To tackle this, we need a ip2instance map like what we have in service entry.
   160  func (esc *endpointSliceController) GetProxyServiceTargets(proxy *model.Proxy) []model.ServiceTarget {
   161  	eps := esc.slices.List(proxy.Metadata.Namespace, endpointSliceSelector)
   162  	var out []model.ServiceTarget
   163  	for _, ep := range eps {
   164  		instances := esc.serviceTargets(ep, proxy)
   165  		out = append(out, instances...)
   166  	}
   167  
   168  	return out
   169  }
   170  
   171  func serviceNameForEndpointSlice(labels map[string]string) string {
   172  	return labels[v1.LabelServiceName]
   173  }
   174  
   175  func (esc *endpointSliceController) serviceTargets(ep *v1.EndpointSlice, proxy *model.Proxy) []model.ServiceTarget {
   176  	var out []model.ServiceTarget
   177  	esc.endpointCache.mu.RLock()
   178  	defer esc.endpointCache.mu.RUnlock()
   179  	for _, svc := range esc.c.servicesForNamespacedName(getServiceNamespacedName(ep)) {
   180  		for _, instance := range esc.endpointCache.get(svc.Hostname) {
   181  			port, f := svc.Ports.Get(instance.ServicePortName)
   182  			if !f {
   183  				log.Warnf("unexpected state, svc %v missing port %v", svc.Hostname, instance.ServicePortName)
   184  				continue
   185  			}
   186  			// consider multiple IP scenarios
   187  			for _, ip := range proxy.IPAddresses {
   188  				if ip != instance.Address {
   189  					continue
   190  				}
   191  				// If the endpoint isn't ready, report this
   192  				if instance.HealthStatus == model.UnHealthy && esc.c.opts.Metrics != nil {
   193  					esc.c.opts.Metrics.AddMetric(model.ProxyStatusEndpointNotReady, proxy.ID, proxy.ID, "")
   194  				}
   195  				si := model.ServiceTarget{
   196  					Service: svc,
   197  					Port: model.ServiceInstancePort{
   198  						ServicePort: port,
   199  						TargetPort:  instance.EndpointPort,
   200  					},
   201  				}
   202  				out = append(out, si)
   203  			}
   204  		}
   205  	}
   206  	return out
   207  }
   208  
   209  func (esc *endpointSliceController) deleteEndpointSlice(slice *v1.EndpointSlice) {
   210  	key := config.NamespacedName(slice)
   211  	for _, e := range slice.Endpoints {
   212  		for _, a := range e.Addresses {
   213  			esc.c.pods.endpointDeleted(key, a)
   214  		}
   215  	}
   216  
   217  	esc.endpointCache.mu.Lock()
   218  	defer esc.endpointCache.mu.Unlock()
   219  	for _, hostName := range esc.c.hostNamesForNamespacedName(getServiceNamespacedName(slice)) {
   220  		// endpointSlice cache update
   221  		if esc.endpointCache.has(hostName) {
   222  			esc.endpointCache.delete(hostName, slice.Name)
   223  		}
   224  	}
   225  }
   226  
   227  func (esc *endpointSliceController) updateEndpointSlice(slice *v1.EndpointSlice) {
   228  	for _, hostname := range esc.c.hostNamesForNamespacedName(getServiceNamespacedName(slice)) {
   229  		esc.updateEndpointCacheForSlice(hostname, slice)
   230  	}
   231  }
   232  
   233  func endpointHealthStatus(svc *model.Service, e v1.Endpoint) model.HealthStatus {
   234  	if e.Conditions.Ready == nil || *e.Conditions.Ready {
   235  		return model.Healthy
   236  	}
   237  
   238  	if features.PersistentSessionLabel != "" &&
   239  		svc != nil &&
   240  		svc.Attributes.Labels[features.PersistentSessionLabel] != "" &&
   241  		(e.Conditions.Serving == nil || *e.Conditions.Serving) &&
   242  		(e.Conditions.Terminating == nil || *e.Conditions.Terminating) {
   243  		return model.Draining
   244  	}
   245  
   246  	return model.UnHealthy
   247  }
   248  
   249  func (esc *endpointSliceController) updateEndpointCacheForSlice(hostName host.Name, slice *v1.EndpointSlice) {
   250  	var endpoints []*model.IstioEndpoint
   251  	if slice.AddressType == v1.AddressTypeFQDN {
   252  		// TODO(https://github.com/istio/istio/issues/34995) support FQDN endpointslice
   253  		return
   254  	}
   255  	svc := esc.c.GetService(hostName)
   256  	discoverabilityPolicy := esc.c.exports.EndpointDiscoverabilityPolicy(svc)
   257  
   258  	for _, e := range slice.Endpoints {
   259  		// Draining tracking is only enabled if persistent sessions is enabled.
   260  		// If we start using them for other features, this can be adjusted.
   261  		healthStatus := endpointHealthStatus(svc, e)
   262  		for _, a := range e.Addresses {
   263  			pod, expectedPod := getPod(esc.c, a, &metav1.ObjectMeta{Name: slice.Name, Namespace: slice.Namespace}, e.TargetRef, hostName)
   264  			if pod == nil && expectedPod {
   265  				continue
   266  			}
   267  			builder := NewEndpointBuilder(esc.c, pod)
   268  			// EDS and ServiceEntry use name for service port - ADS will need to map to numbers.
   269  			for _, port := range slice.Ports {
   270  				var portNum int32
   271  				if port.Port != nil {
   272  					portNum = *port.Port
   273  				}
   274  				var portName string
   275  				if port.Name != nil {
   276  					portName = *port.Name
   277  				}
   278  
   279  				istioEndpoint := builder.buildIstioEndpoint(a, portNum, portName, discoverabilityPolicy, healthStatus)
   280  				endpoints = append(endpoints, istioEndpoint)
   281  			}
   282  		}
   283  	}
   284  	esc.endpointCache.Update(hostName, slice.Name, endpoints)
   285  }
   286  
   287  func (esc *endpointSliceController) buildIstioEndpointsWithService(name, namespace string, hostName host.Name, updateCache bool) []*model.IstioEndpoint {
   288  	esLabelSelector := endpointSliceSelectorForService(name)
   289  	slices := esc.slices.List(namespace, esLabelSelector)
   290  	if len(slices) == 0 {
   291  		log.Debugf("endpoint slices of (%s, %s) not found", name, namespace)
   292  		return nil
   293  	}
   294  
   295  	if updateCache {
   296  		// A cache update was requested. Rebuild the endpoints for these slices.
   297  		for _, slice := range slices {
   298  			esc.updateEndpointCacheForSlice(hostName, slice)
   299  		}
   300  	}
   301  
   302  	return esc.endpointCache.Get(hostName)
   303  }
   304  
   305  func getServiceNamespacedName(slice *v1.EndpointSlice) types.NamespacedName {
   306  	return types.NamespacedName{
   307  		Namespace: slice.GetNamespace(),
   308  		Name:      serviceNameForEndpointSlice(slice.GetLabels()),
   309  	}
   310  }
   311  
   312  // endpointKey unique identifies an endpoint by IP and port name
   313  // This is used for deduping endpoints across slices.
   314  type endpointKey struct {
   315  	ip   string
   316  	port string
   317  }
   318  
   319  type endpointSliceCache struct {
   320  	mu                         sync.RWMutex
   321  	endpointsByServiceAndSlice map[host.Name]map[string][]*model.IstioEndpoint
   322  }
   323  
   324  func newEndpointSliceCache() *endpointSliceCache {
   325  	out := &endpointSliceCache{
   326  		endpointsByServiceAndSlice: make(map[host.Name]map[string][]*model.IstioEndpoint),
   327  	}
   328  	return out
   329  }
   330  
   331  func (e *endpointSliceCache) Update(hostname host.Name, slice string, endpoints []*model.IstioEndpoint) {
   332  	e.mu.Lock()
   333  	defer e.mu.Unlock()
   334  	e.update(hostname, slice, endpoints)
   335  }
   336  
   337  func (e *endpointSliceCache) update(hostname host.Name, slice string, endpoints []*model.IstioEndpoint) {
   338  	if len(endpoints) == 0 {
   339  		delete(e.endpointsByServiceAndSlice[hostname], slice)
   340  	}
   341  	if _, f := e.endpointsByServiceAndSlice[hostname]; !f {
   342  		e.endpointsByServiceAndSlice[hostname] = make(map[string][]*model.IstioEndpoint)
   343  	}
   344  	// We will always overwrite. A conflict here means an endpoint is transitioning
   345  	// from one slice to another See
   346  	// https://github.com/kubernetes/website/blob/master/content/en/docs/concepts/services-networking/endpoint-slices.md#duplicate-endpoints
   347  	// In this case, we can always assume and update is fresh, although older slices
   348  	// we have not gotten updates may be stale; therefore we always take the new
   349  	// update.
   350  	e.endpointsByServiceAndSlice[hostname][slice] = endpoints
   351  }
   352  
   353  func (e *endpointSliceCache) Delete(hostname host.Name, slice string) {
   354  	e.mu.Lock()
   355  	defer e.mu.Unlock()
   356  	e.delete(hostname, slice)
   357  }
   358  
   359  func (e *endpointSliceCache) delete(hostname host.Name, slice string) {
   360  	delete(e.endpointsByServiceAndSlice[hostname], slice)
   361  	if len(e.endpointsByServiceAndSlice[hostname]) == 0 {
   362  		delete(e.endpointsByServiceAndSlice, hostname)
   363  	}
   364  }
   365  
   366  func (e *endpointSliceCache) Get(hostname host.Name) []*model.IstioEndpoint {
   367  	e.mu.RLock()
   368  	defer e.mu.RUnlock()
   369  	return e.get(hostname)
   370  }
   371  
   372  func (e *endpointSliceCache) get(hostname host.Name) []*model.IstioEndpoint {
   373  	var endpoints []*model.IstioEndpoint
   374  	found := sets.New[endpointKey]()
   375  	for _, eps := range e.endpointsByServiceAndSlice[hostname] {
   376  		for _, ep := range eps {
   377  			key := endpointKey{ep.Address, ep.ServicePortName}
   378  			if found.InsertContains(key) {
   379  				// This a duplicate. Update() already handles conflict resolution, so we don't
   380  				// need to pick the "right" one here.
   381  				continue
   382  			}
   383  			endpoints = append(endpoints, ep)
   384  		}
   385  	}
   386  	return endpoints
   387  }
   388  
   389  func (e *endpointSliceCache) Has(hostname host.Name) bool {
   390  	e.mu.RLock()
   391  	defer e.mu.RUnlock()
   392  	return e.has(hostname)
   393  }
   394  
   395  func (e *endpointSliceCache) has(hostname host.Name) bool {
   396  	_, found := e.endpointsByServiceAndSlice[hostname]
   397  	return found
   398  }
   399  
   400  func endpointSliceSelectorForService(name string) klabels.Selector {
   401  	return klabels.Set(map[string]string{
   402  		v1beta1.LabelServiceName: name,
   403  	}).AsSelectorPreValidated().Add(*endpointSliceRequirement)
   404  }
   405  
   406  func (esc *endpointSliceController) pushEDS(hostnames []host.Name, namespace string) {
   407  	shard := model.ShardKeyFromRegistry(esc.c)
   408  	// Even though we just read from the cache, we need the full lock to ensure pushEDS
   409  	// runs sequentially when `EnableK8SServiceSelectWorkloadEntries` is enabled. Otherwise,
   410  	// we may end up with eds updates can go out of order with workload entry updates causing
   411  	// incorrect endpoints. For regular endpoint updates, pushEDS is already serialized
   412  	// because the events are queued.
   413  	esc.endpointCache.mu.Lock()
   414  	defer esc.endpointCache.mu.Unlock()
   415  	for _, hostname := range hostnames {
   416  		endpoints := esc.endpointCache.get(hostname)
   417  		if features.EnableK8SServiceSelectWorkloadEntries {
   418  			svc := esc.c.GetService(hostname)
   419  			if svc != nil {
   420  				fep := esc.c.collectWorkloadInstanceEndpoints(svc)
   421  				endpoints = append(endpoints, fep...)
   422  			} else {
   423  				log.Debugf("Handle EDS endpoint: skip collecting workload entry endpoints, service %s/ has not been populated",
   424  					hostname)
   425  			}
   426  		}
   427  
   428  		esc.c.opts.XDSUpdater.EDSUpdate(shard, string(hostname), namespace, endpoints)
   429  	}
   430  }
   431  
   432  // getPod fetches a pod by name or IP address.
   433  // A pod may be missing (nil) for two reasons:
   434  //   - It is an endpoint without an associated Pod. In this case, expectPod will be false.
   435  //   - It is an endpoint with an associate Pod, but its not found. In this case, expectPod will be true.
   436  //     this may happen due to eventually consistency issues, out of order events, etc. In this case, the caller
   437  //     should not precede with the endpoint, or inaccurate information would be sent which may have impacts on
   438  //     correctness and security.
   439  //
   440  // Note: this is only used by endpointslice controller
   441  func getPod(c *Controller, ip string, ep *metav1.ObjectMeta, targetRef *corev1.ObjectReference, host host.Name) (*corev1.Pod, bool) {
   442  	var expectPod bool
   443  	pod := c.getPod(ip, ep.Namespace, targetRef)
   444  	if targetRef != nil && targetRef.Kind == "Pod" {
   445  		expectPod = true
   446  		if pod == nil {
   447  			c.registerEndpointResync(ep, ip, host)
   448  		}
   449  	}
   450  
   451  	return pod, expectPod
   452  }
   453  
   454  func (c *Controller) registerEndpointResync(ep *metav1.ObjectMeta, ip string, host host.Name) {
   455  	// This means, the endpoint event has arrived before pod event.
   456  	// This might happen because PodCache is eventually consistent.
   457  	log.Debugf("Endpoint without pod %s %s.%s", ip, ep.Name, ep.Namespace)
   458  	endpointsWithNoPods.Increment()
   459  	if c.opts.Metrics != nil {
   460  		c.opts.Metrics.AddMetric(model.EndpointNoPod, string(host), "", ip)
   461  	}
   462  	// Tell pod cache we want to queue the endpoint event when this pod arrives.
   463  	c.pods.queueEndpointEventOnPodArrival(config.NamespacedName(ep), ip)
   464  }
   465  
   466  // getPod fetches a pod by name or IP address.
   467  // A pod may be missing (nil) for two reasons:
   468  // * It is an endpoint without an associated Pod.
   469  // * It is an endpoint with an associate Pod, but its not found.
   470  func (c *Controller) getPod(ip string, namespace string, targetRef *corev1.ObjectReference) *corev1.Pod {
   471  	if targetRef != nil && targetRef.Kind == "Pod" {
   472  		key := types.NamespacedName{Name: targetRef.Name, Namespace: targetRef.Namespace}
   473  		pod := c.pods.getPodByKey(key)
   474  		return pod
   475  	}
   476  	// This means the endpoint is manually controlled
   477  	// We will want to lookup a pod to find metadata like service account, labels, etc. But for hostNetwork, we just get a raw IP,
   478  	// and the IP may be shared by many pods. Best we can do is guess.
   479  	pods := c.pods.getPodsByIP(ip)
   480  	for _, p := range pods {
   481  		if p.Namespace == namespace {
   482  			// Might not be right, but best we can do.
   483  			return p
   484  		}
   485  	}
   486  	return nil
   487  }