github.com/cilium/cilium@v1.16.2/pkg/k8s/endpoints.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package k8s
     5  
     6  import (
     7  	"fmt"
     8  	"net"
     9  	"net/netip"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  
    14  	corev1 "k8s.io/api/core/v1"
    15  
    16  	cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
    17  	slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
    18  	slim_discovery_v1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/discovery/v1"
    19  	slim_discovery_v1beta1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/discovery/v1beta1"
    20  	slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1"
    21  	"github.com/cilium/cilium/pkg/k8s/types"
    22  	"github.com/cilium/cilium/pkg/loadbalancer"
    23  	"github.com/cilium/cilium/pkg/metrics"
    24  	"github.com/cilium/cilium/pkg/option"
    25  	serviceStore "github.com/cilium/cilium/pkg/service/store"
    26  )
    27  
    28  // Endpoints is an abstraction for the Kubernetes endpoints object. Endpoints
    29  // consists of a set of backend IPs in combination with a set of ports and
    30  // protocols. The name of the backend ports must match the names of the
    31  // frontend ports of the corresponding service.
    32  //
    33  // The Endpoints object is parsed from either an EndpointSlice (preferred) or Endpoint
    34  // Kubernetes objects depending on the Kubernetes version.
    35  //
    36  // +k8s:deepcopy-gen=true
    37  // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
    38  // +deepequal-gen=true
    39  // +deepequal-gen:private-method=true
    40  type Endpoints struct {
    41  	types.UnserializableObject
    42  	slim_metav1.ObjectMeta
    43  
    44  	EndpointSliceID
    45  
    46  	// Backends is a map containing all backend IPs and ports. The key to
    47  	// the map is the backend IP in string form. The value defines the list
    48  	// of ports for that backend IP, plus an additional optional node name.
    49  	// Backends map[cmtypes.AddrCluster]*Backend
    50  	Backends map[cmtypes.AddrCluster]*Backend
    51  }
    52  
    53  // DeepEqual returns true if both endpoints are deep equal.
    54  func (e *Endpoints) DeepEqual(o *Endpoints) bool {
    55  	switch {
    56  	case (e == nil) != (o == nil):
    57  		return false
    58  	case (e == nil) && (o == nil):
    59  		return true
    60  	}
    61  	return e.deepEqual(o)
    62  }
    63  
    64  func (in *Endpoints) DeepCopyInto(out *Endpoints) {
    65  	*out = *in
    66  	if in.Backends != nil {
    67  		in, out := &in.Backends, &out.Backends
    68  		*out = make(map[cmtypes.AddrCluster]*Backend, len(*in))
    69  		for key, val := range *in {
    70  			var outVal *Backend
    71  			if val == nil {
    72  				(*out)[key] = nil
    73  			} else {
    74  				in, out := &val, &outVal
    75  				*out = new(Backend)
    76  				(*in).DeepCopyInto(*out)
    77  			}
    78  			(*out)[key] = outVal
    79  		}
    80  	}
    81  }
    82  
    83  func (in *Endpoints) DeepCopy() *Endpoints {
    84  	if in == nil {
    85  		return nil
    86  	}
    87  	out := new(Endpoints)
    88  	in.DeepCopyInto(out)
    89  	return out
    90  }
    91  
    92  // Backend contains all ports, terminating state, and the node name of a given backend
    93  //
    94  // +k8s:deepcopy-gen=true
    95  // +deepequal-gen=true
    96  type Backend struct {
    97  	Ports         serviceStore.PortConfiguration
    98  	NodeName      string
    99  	Hostname      string
   100  	Terminating   bool
   101  	HintsForZones []string
   102  	Preferred     bool
   103  	Zone          string
   104  }
   105  
   106  // String returns the string representation of an endpoints resource, with
   107  // backends and ports sorted.
   108  func (e *Endpoints) String() string {
   109  	if e == nil {
   110  		return ""
   111  	}
   112  
   113  	backends := []string{}
   114  	for addrCluster, be := range e.Backends {
   115  		for _, port := range be.Ports {
   116  			if be.Zone != "" {
   117  				backends = append(backends, fmt.Sprintf("%s/%s[%s]", net.JoinHostPort(addrCluster.Addr().String(), strconv.Itoa(int(port.Port))), port.Protocol, be.Zone))
   118  			} else {
   119  				backends = append(backends, fmt.Sprintf("%s/%s", net.JoinHostPort(addrCluster.Addr().String(), strconv.Itoa(int(port.Port))), port.Protocol))
   120  			}
   121  		}
   122  	}
   123  
   124  	sort.Strings(backends)
   125  
   126  	return strings.Join(backends, ",")
   127  }
   128  
   129  // newEndpoints returns a new Endpoints
   130  func newEndpoints() *Endpoints {
   131  	return &Endpoints{
   132  		Backends: map[cmtypes.AddrCluster]*Backend{},
   133  	}
   134  }
   135  
   136  // Prefixes returns the endpoint's backends as a slice of netip.Prefix.
   137  func (e *Endpoints) Prefixes() []netip.Prefix {
   138  	prefixes := make([]netip.Prefix, 0, len(e.Backends))
   139  	for addrCluster := range e.Backends {
   140  		addr := addrCluster.Addr()
   141  		prefixes = append(prefixes, netip.PrefixFrom(addr, addr.BitLen()))
   142  	}
   143  	return prefixes
   144  }
   145  
   146  // ParseEndpointsID parses a Kubernetes endpoints and returns the EndpointSliceID
   147  func ParseEndpointsID(ep *slim_corev1.Endpoints) EndpointSliceID {
   148  	return EndpointSliceID{
   149  		ServiceID: ServiceID{
   150  			Name:      ep.ObjectMeta.Name,
   151  			Namespace: ep.ObjectMeta.Namespace,
   152  		},
   153  		EndpointSliceName: ep.ObjectMeta.Name,
   154  	}
   155  }
   156  
   157  // ParseEndpoints parses a Kubernetes Endpoints resource
   158  func ParseEndpoints(ep *slim_corev1.Endpoints) *Endpoints {
   159  	endpoints := newEndpoints()
   160  	endpoints.ObjectMeta = ep.ObjectMeta
   161  
   162  	for _, sub := range ep.Subsets {
   163  		for _, addr := range sub.Addresses {
   164  			addrCluster, err := cmtypes.ParseAddrCluster(addr.IP)
   165  			if err != nil {
   166  				continue
   167  			}
   168  
   169  			backend, ok := endpoints.Backends[addrCluster]
   170  			if !ok {
   171  				backend = &Backend{Ports: serviceStore.PortConfiguration{}}
   172  				endpoints.Backends[addrCluster] = backend
   173  			}
   174  
   175  			if addr.NodeName != nil {
   176  				backend.NodeName = *addr.NodeName
   177  			}
   178  			backend.Hostname = addr.Hostname
   179  
   180  			for _, port := range sub.Ports {
   181  				lbPort := loadbalancer.NewL4Addr(loadbalancer.L4Type(port.Protocol), uint16(port.Port))
   182  				backend.Ports[port.Name] = lbPort
   183  			}
   184  		}
   185  	}
   186  
   187  	endpoints.EndpointSliceID = ParseEndpointsID(ep)
   188  	return endpoints
   189  }
   190  
   191  type endpointSlice interface {
   192  	GetNamespace() string
   193  	GetName() string
   194  	GetLabels() map[string]string
   195  }
   196  
   197  // ParseEndpointSliceID parses a Kubernetes endpoints slice and returns a
   198  // EndpointSliceID
   199  func ParseEndpointSliceID(es endpointSlice) EndpointSliceID {
   200  	return EndpointSliceID{
   201  		ServiceID: ServiceID{
   202  			Name:      es.GetLabels()[slim_discovery_v1.LabelServiceName],
   203  			Namespace: es.GetNamespace(),
   204  		},
   205  		EndpointSliceName: es.GetName(),
   206  	}
   207  }
   208  
   209  // ParseEndpointSliceV1Beta1 parses a Kubernetes EndpointsSlice v1beta1 resource
   210  // It reads ready and terminating state of endpoints in the EndpointSlice to
   211  // return an EndpointSlice ID and a filtered list of Endpoints for service load-balancing.
   212  func ParseEndpointSliceV1Beta1(ep *slim_discovery_v1beta1.EndpointSlice) *Endpoints {
   213  	endpoints := newEndpoints()
   214  	endpoints.ObjectMeta = ep.ObjectMeta
   215  	endpoints.EndpointSliceID = ParseEndpointSliceID(ep)
   216  
   217  	// Validate AddressType before parsing. Currently, we only support IPv4 and IPv6.
   218  	if ep.AddressType != slim_discovery_v1beta1.AddressTypeIPv4 &&
   219  		ep.AddressType != slim_discovery_v1beta1.AddressTypeIPv6 {
   220  		return endpoints
   221  	}
   222  
   223  	for _, sub := range ep.Endpoints {
   224  		skipEndpoint := false
   225  		// ready indicates that this endpoint is prepared to receive traffic,
   226  		// according to whatever system is managing the endpoint. A nil value
   227  		// indicates an unknown state. In most cases consumers should interpret this
   228  		// unknown state as ready.
   229  		// More info: vendor/k8s.io/api/discovery/v1beta1/types.go
   230  		if sub.Conditions.Ready != nil && !*sub.Conditions.Ready {
   231  			skipEndpoint = true
   232  			if option.Config.EnableK8sTerminatingEndpoint {
   233  				// Terminating indicates that the endpoint is getting terminated. A
   234  				// nil values indicates an unknown state. Ready is never true when
   235  				// an endpoint is terminating. Propagate the terminating endpoint
   236  				// state so that we can gracefully remove those endpoints.
   237  				// More details : vendor/k8s.io/api/discovery/v1/types.go
   238  				if sub.Conditions.Terminating != nil && *sub.Conditions.Terminating {
   239  					skipEndpoint = false
   240  				}
   241  			}
   242  		}
   243  		if skipEndpoint {
   244  			continue
   245  		}
   246  		for _, addr := range sub.Addresses {
   247  			addrCluster, err := cmtypes.ParseAddrCluster(addr)
   248  			if err != nil {
   249  				continue
   250  			}
   251  
   252  			backend, ok := endpoints.Backends[addrCluster]
   253  			if !ok {
   254  				backend = &Backend{Ports: serviceStore.PortConfiguration{}}
   255  				endpoints.Backends[addrCluster] = backend
   256  				if nodeName, ok := sub.Topology[corev1.LabelHostname]; ok {
   257  					backend.NodeName = nodeName
   258  				}
   259  				if sub.Hostname != nil {
   260  					backend.Hostname = *sub.Hostname
   261  				}
   262  				if option.Config.EnableK8sTerminatingEndpoint {
   263  					if sub.Conditions.Terminating != nil && *sub.Conditions.Terminating {
   264  						backend.Terminating = true
   265  						metrics.TerminatingEndpointsEvents.Inc()
   266  					}
   267  				}
   268  				if zoneName, ok := sub.Topology[corev1.LabelTopologyZone]; ok {
   269  					backend.Zone = zoneName
   270  				}
   271  			}
   272  
   273  			for _, port := range ep.Ports {
   274  				name, lbPort := parseEndpointPortV1Beta1(port)
   275  				if lbPort != nil {
   276  					backend.Ports[name] = lbPort
   277  				}
   278  			}
   279  		}
   280  	}
   281  	return endpoints
   282  }
   283  
   284  // parseEndpointPortV1Beta1 returns the port name and the port parsed as a
   285  // L4Addr from the given port.
   286  func parseEndpointPortV1Beta1(port slim_discovery_v1beta1.EndpointPort) (string, *loadbalancer.L4Addr) {
   287  	proto := loadbalancer.TCP
   288  	if port.Protocol != nil {
   289  		switch *port.Protocol {
   290  		case slim_corev1.ProtocolTCP:
   291  			proto = loadbalancer.TCP
   292  		case slim_corev1.ProtocolUDP:
   293  			proto = loadbalancer.UDP
   294  		case slim_corev1.ProtocolSCTP:
   295  			proto = loadbalancer.SCTP
   296  		default:
   297  			return "", nil
   298  		}
   299  	}
   300  	if port.Port == nil {
   301  		return "", nil
   302  	}
   303  	var name string
   304  	if port.Name != nil {
   305  		name = *port.Name
   306  	}
   307  	lbPort := loadbalancer.NewL4Addr(proto, uint16(*port.Port))
   308  	return name, lbPort
   309  }
   310  
   311  // ParseEndpointSliceV1 parses a Kubernetes EndpointSlice resource.
   312  // It reads ready and terminating state of endpoints in the EndpointSlice to
   313  // return an EndpointSlice ID and a filtered list of Endpoints for service load-balancing.
   314  func ParseEndpointSliceV1(ep *slim_discovery_v1.EndpointSlice) *Endpoints {
   315  	endpoints := newEndpoints()
   316  	endpoints.ObjectMeta = ep.ObjectMeta
   317  	endpoints.EndpointSliceID = ParseEndpointSliceID(ep)
   318  
   319  	// Validate AddressType before parsing. Currently, we only support IPv4 and IPv6.
   320  	if ep.AddressType != slim_discovery_v1.AddressTypeIPv4 &&
   321  		ep.AddressType != slim_discovery_v1.AddressTypeIPv6 {
   322  		return endpoints
   323  	}
   324  
   325  	log.Debugf("Processing %d endpoints for EndpointSlice %s", len(ep.Endpoints), ep.Name)
   326  	for _, sub := range ep.Endpoints {
   327  		// ready indicates that this endpoint is prepared to receive traffic,
   328  		// according to whatever system is managing the endpoint. A nil value
   329  		// indicates an unknown state. In most cases consumers should interpret this
   330  		// unknown state as ready.
   331  		// More info: vendor/k8s.io/api/discovery/v1/types.go
   332  		isReady := sub.Conditions.Ready == nil || *sub.Conditions.Ready
   333  		// serving is identical to ready except that it is set regardless of the
   334  		// terminating state of endpoints. This condition should be set to true for
   335  		// a ready endpoint that is terminating. If nil, consumers should defer to
   336  		// the ready condition.
   337  		// More info: vendor/k8s.io/api/discovery/v1/types.go
   338  		isServing := (sub.Conditions.Serving == nil && isReady) || (sub.Conditions.Serving != nil && *sub.Conditions.Serving)
   339  		// Terminating indicates that the endpoint is getting terminated. A
   340  		// nil values indicates an unknown state. Ready is never true when
   341  		// an endpoint is terminating. Propagate the terminating endpoint
   342  		// state so that we can gracefully remove those endpoints.
   343  		// More info: vendor/k8s.io/api/discovery/v1/types.go
   344  		isTerminating := sub.Conditions.Terminating != nil && *sub.Conditions.Terminating
   345  
   346  		// if is not Ready and EnableK8sTerminatingEndpoint is set
   347  		// allow endpoints that are Serving and Terminating
   348  		if !isReady {
   349  			if !option.Config.EnableK8sTerminatingEndpoint {
   350  				log.Debugf("discarding Endpoint on EndpointSlice %s: not Ready and EnableK8sTerminatingEndpoint %v", ep.Name, option.Config.EnableK8sTerminatingEndpoint)
   351  				continue
   352  			}
   353  			// filter not Serving endpoints since those can not receive traffic
   354  			if !isServing {
   355  				log.Debugf("discarding Endpoint on EndpointSlice %s: not Serving and EnableK8sTerminatingEndpoint %v", ep.Name, option.Config.EnableK8sTerminatingEndpoint)
   356  				continue
   357  			}
   358  		}
   359  
   360  		for _, addr := range sub.Addresses {
   361  			addrCluster, err := cmtypes.ParseAddrCluster(addr)
   362  			if err != nil {
   363  				log.WithError(err).Infof("Unable to parse address %s for EndpointSlices %s", addr, ep.Name)
   364  				continue
   365  			}
   366  
   367  			backend, ok := endpoints.Backends[addrCluster]
   368  			if !ok {
   369  				backend = &Backend{Ports: serviceStore.PortConfiguration{}}
   370  				endpoints.Backends[addrCluster] = backend
   371  				if sub.NodeName != nil {
   372  					backend.NodeName = *sub.NodeName
   373  				} else {
   374  					if nodeName, ok := sub.DeprecatedTopology[corev1.LabelHostname]; ok {
   375  						backend.NodeName = nodeName
   376  					}
   377  				}
   378  				if sub.Hostname != nil {
   379  					backend.Hostname = *sub.Hostname
   380  				}
   381  				if sub.Zone != nil {
   382  					backend.Zone = *sub.Zone
   383  				} else if zoneName, ok := sub.DeprecatedTopology[corev1.LabelTopologyZone]; ok {
   384  					backend.Zone = zoneName
   385  				}
   386  				// If is not ready check if is serving and terminating
   387  				if !isReady && option.Config.EnableK8sTerminatingEndpoint &&
   388  					isServing && isTerminating {
   389  					log.Debugf("Endpoint address %s on EndpointSlice %s is Terminating", addr, ep.Name)
   390  					backend.Terminating = true
   391  					metrics.TerminatingEndpointsEvents.Inc()
   392  				}
   393  			}
   394  
   395  			for _, port := range ep.Ports {
   396  				name, lbPort := parseEndpointPortV1(port)
   397  				if lbPort != nil {
   398  					backend.Ports[name] = lbPort
   399  				}
   400  			}
   401  			if sub.Hints != nil && (*sub.Hints).ForZones != nil {
   402  				hints := (*sub.Hints).ForZones
   403  				backend.HintsForZones = make([]string, len(hints))
   404  				for i, hint := range hints {
   405  					backend.HintsForZones[i] = hint.Name
   406  				}
   407  			}
   408  		}
   409  	}
   410  
   411  	log.Debugf("EndpointSlice %s has %d backends", ep.Name, len(endpoints.Backends))
   412  	return endpoints
   413  }
   414  
   415  // parseEndpointPortV1 returns the port name and the port parsed as a L4Addr from
   416  // the given port.
   417  func parseEndpointPortV1(port slim_discovery_v1.EndpointPort) (string, *loadbalancer.L4Addr) {
   418  	proto := loadbalancer.TCP
   419  	if port.Protocol != nil {
   420  		switch *port.Protocol {
   421  		case slim_corev1.ProtocolTCP:
   422  			proto = loadbalancer.TCP
   423  		case slim_corev1.ProtocolUDP:
   424  			proto = loadbalancer.UDP
   425  		case slim_corev1.ProtocolSCTP:
   426  			proto = loadbalancer.SCTP
   427  		default:
   428  			return "", nil
   429  		}
   430  	}
   431  	if port.Port == nil {
   432  		return "", nil
   433  	}
   434  	var name string
   435  	if port.Name != nil {
   436  		name = *port.Name
   437  	}
   438  	lbPort := loadbalancer.NewL4Addr(proto, uint16(*port.Port))
   439  	return name, lbPort
   440  }
   441  
   442  // EndpointSlices is the collection of all endpoint slices of a service.
   443  // The map key is the name of the endpoint slice or the name of the legacy
   444  // v1.Endpoint. The endpoints stored here are not namespaced since this
   445  // structure is only used as a value of another map that is already namespaced.
   446  // (see ServiceCache.endpoints).
   447  //
   448  // +deepequal-gen=true
   449  type EndpointSlices struct {
   450  	epSlices map[string]*Endpoints
   451  }
   452  
   453  // newEndpointsSlices returns a new EndpointSlices
   454  func newEndpointsSlices() *EndpointSlices {
   455  	return &EndpointSlices{
   456  		epSlices: map[string]*Endpoints{},
   457  	}
   458  }
   459  
   460  // GetEndpoints returns a read only a single *Endpoints structure with all
   461  // Endpoints' backends joined.
   462  func (es *EndpointSlices) GetEndpoints() *Endpoints {
   463  	if es == nil || len(es.epSlices) == 0 {
   464  		return nil
   465  	}
   466  	allEps := newEndpoints()
   467  	for _, eps := range es.epSlices {
   468  		for backend, ep := range eps.Backends {
   469  			// EndpointSlices may have duplicate addresses on different slices.
   470  			// kubectl get endpointslices -n endpointslicemirroring-4896
   471  			// NAME                             ADDRESSTYPE   PORTS   ENDPOINTS     AGE
   472  			// example-custom-endpoints-f6z84   IPv4          9090    10.244.1.49   28s
   473  			// example-custom-endpoints-g6r6v   IPv4          8090    10.244.1.49   28s
   474  			b, ok := allEps.Backends[backend]
   475  			if !ok {
   476  				allEps.Backends[backend] = ep.DeepCopy()
   477  			} else {
   478  				clone := b.DeepCopy()
   479  				for k, v := range ep.Ports {
   480  					clone.Ports[k] = v
   481  				}
   482  				allEps.Backends[backend] = clone
   483  			}
   484  		}
   485  	}
   486  	return allEps
   487  }
   488  
   489  // Upsert maps the 'esname' to 'e'.
   490  // - 'esName': Name of the Endpoint Slice
   491  // - 'e': Endpoints to store in the map
   492  func (es *EndpointSlices) Upsert(esName string, e *Endpoints) {
   493  	if es == nil {
   494  		panic("BUG: EndpointSlices is nil")
   495  	}
   496  	es.epSlices[esName] = e
   497  }
   498  
   499  // Delete deletes the endpoint slice in the internal map. Returns true if there
   500  // are not any more endpoints available in the map.
   501  func (es *EndpointSlices) Delete(esName string) bool {
   502  	if es == nil || len(es.epSlices) == 0 {
   503  		return true
   504  	}
   505  	delete(es.epSlices, esName)
   506  	return len(es.epSlices) == 0
   507  }
   508  
   509  // externalEndpoints is the collection of external endpoints in all remote
   510  // clusters. The map key is the name of the remote cluster.
   511  type externalEndpoints struct {
   512  	endpoints map[string]*Endpoints
   513  }
   514  
   515  // newExternalEndpoints returns a new ExternalEndpoints
   516  func newExternalEndpoints() externalEndpoints {
   517  	return externalEndpoints{
   518  		endpoints: map[string]*Endpoints{},
   519  	}
   520  }