github.com/cilium/cilium@v1.16.2/pkg/k8s/endpoints.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package k8s 5 6 import ( 7 "fmt" 8 "net" 9 "net/netip" 10 "sort" 11 "strconv" 12 "strings" 13 14 corev1 "k8s.io/api/core/v1" 15 16 cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" 17 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 18 slim_discovery_v1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/discovery/v1" 19 slim_discovery_v1beta1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/discovery/v1beta1" 20 slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 21 "github.com/cilium/cilium/pkg/k8s/types" 22 "github.com/cilium/cilium/pkg/loadbalancer" 23 "github.com/cilium/cilium/pkg/metrics" 24 "github.com/cilium/cilium/pkg/option" 25 serviceStore "github.com/cilium/cilium/pkg/service/store" 26 ) 27 28 // Endpoints is an abstraction for the Kubernetes endpoints object. Endpoints 29 // consists of a set of backend IPs in combination with a set of ports and 30 // protocols. The name of the backend ports must match the names of the 31 // frontend ports of the corresponding service. 32 // 33 // The Endpoints object is parsed from either an EndpointSlice (preferred) or Endpoint 34 // Kubernetes objects depending on the Kubernetes version. 35 // 36 // +k8s:deepcopy-gen=true 37 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 38 // +deepequal-gen=true 39 // +deepequal-gen:private-method=true 40 type Endpoints struct { 41 types.UnserializableObject 42 slim_metav1.ObjectMeta 43 44 EndpointSliceID 45 46 // Backends is a map containing all backend IPs and ports. The key to 47 // the map is the backend IP in string form. The value defines the list 48 // of ports for that backend IP, plus an additional optional node name. 49 // Backends map[cmtypes.AddrCluster]*Backend 50 Backends map[cmtypes.AddrCluster]*Backend 51 } 52 53 // DeepEqual returns true if both endpoints are deep equal. 54 func (e *Endpoints) DeepEqual(o *Endpoints) bool { 55 switch { 56 case (e == nil) != (o == nil): 57 return false 58 case (e == nil) && (o == nil): 59 return true 60 } 61 return e.deepEqual(o) 62 } 63 64 func (in *Endpoints) DeepCopyInto(out *Endpoints) { 65 *out = *in 66 if in.Backends != nil { 67 in, out := &in.Backends, &out.Backends 68 *out = make(map[cmtypes.AddrCluster]*Backend, len(*in)) 69 for key, val := range *in { 70 var outVal *Backend 71 if val == nil { 72 (*out)[key] = nil 73 } else { 74 in, out := &val, &outVal 75 *out = new(Backend) 76 (*in).DeepCopyInto(*out) 77 } 78 (*out)[key] = outVal 79 } 80 } 81 } 82 83 func (in *Endpoints) DeepCopy() *Endpoints { 84 if in == nil { 85 return nil 86 } 87 out := new(Endpoints) 88 in.DeepCopyInto(out) 89 return out 90 } 91 92 // Backend contains all ports, terminating state, and the node name of a given backend 93 // 94 // +k8s:deepcopy-gen=true 95 // +deepequal-gen=true 96 type Backend struct { 97 Ports serviceStore.PortConfiguration 98 NodeName string 99 Hostname string 100 Terminating bool 101 HintsForZones []string 102 Preferred bool 103 Zone string 104 } 105 106 // String returns the string representation of an endpoints resource, with 107 // backends and ports sorted. 108 func (e *Endpoints) String() string { 109 if e == nil { 110 return "" 111 } 112 113 backends := []string{} 114 for addrCluster, be := range e.Backends { 115 for _, port := range be.Ports { 116 if be.Zone != "" { 117 backends = append(backends, fmt.Sprintf("%s/%s[%s]", net.JoinHostPort(addrCluster.Addr().String(), strconv.Itoa(int(port.Port))), port.Protocol, be.Zone)) 118 } else { 119 backends = append(backends, fmt.Sprintf("%s/%s", net.JoinHostPort(addrCluster.Addr().String(), strconv.Itoa(int(port.Port))), port.Protocol)) 120 } 121 } 122 } 123 124 sort.Strings(backends) 125 126 return strings.Join(backends, ",") 127 } 128 129 // newEndpoints returns a new Endpoints 130 func newEndpoints() *Endpoints { 131 return &Endpoints{ 132 Backends: map[cmtypes.AddrCluster]*Backend{}, 133 } 134 } 135 136 // Prefixes returns the endpoint's backends as a slice of netip.Prefix. 137 func (e *Endpoints) Prefixes() []netip.Prefix { 138 prefixes := make([]netip.Prefix, 0, len(e.Backends)) 139 for addrCluster := range e.Backends { 140 addr := addrCluster.Addr() 141 prefixes = append(prefixes, netip.PrefixFrom(addr, addr.BitLen())) 142 } 143 return prefixes 144 } 145 146 // ParseEndpointsID parses a Kubernetes endpoints and returns the EndpointSliceID 147 func ParseEndpointsID(ep *slim_corev1.Endpoints) EndpointSliceID { 148 return EndpointSliceID{ 149 ServiceID: ServiceID{ 150 Name: ep.ObjectMeta.Name, 151 Namespace: ep.ObjectMeta.Namespace, 152 }, 153 EndpointSliceName: ep.ObjectMeta.Name, 154 } 155 } 156 157 // ParseEndpoints parses a Kubernetes Endpoints resource 158 func ParseEndpoints(ep *slim_corev1.Endpoints) *Endpoints { 159 endpoints := newEndpoints() 160 endpoints.ObjectMeta = ep.ObjectMeta 161 162 for _, sub := range ep.Subsets { 163 for _, addr := range sub.Addresses { 164 addrCluster, err := cmtypes.ParseAddrCluster(addr.IP) 165 if err != nil { 166 continue 167 } 168 169 backend, ok := endpoints.Backends[addrCluster] 170 if !ok { 171 backend = &Backend{Ports: serviceStore.PortConfiguration{}} 172 endpoints.Backends[addrCluster] = backend 173 } 174 175 if addr.NodeName != nil { 176 backend.NodeName = *addr.NodeName 177 } 178 backend.Hostname = addr.Hostname 179 180 for _, port := range sub.Ports { 181 lbPort := loadbalancer.NewL4Addr(loadbalancer.L4Type(port.Protocol), uint16(port.Port)) 182 backend.Ports[port.Name] = lbPort 183 } 184 } 185 } 186 187 endpoints.EndpointSliceID = ParseEndpointsID(ep) 188 return endpoints 189 } 190 191 type endpointSlice interface { 192 GetNamespace() string 193 GetName() string 194 GetLabels() map[string]string 195 } 196 197 // ParseEndpointSliceID parses a Kubernetes endpoints slice and returns a 198 // EndpointSliceID 199 func ParseEndpointSliceID(es endpointSlice) EndpointSliceID { 200 return EndpointSliceID{ 201 ServiceID: ServiceID{ 202 Name: es.GetLabels()[slim_discovery_v1.LabelServiceName], 203 Namespace: es.GetNamespace(), 204 }, 205 EndpointSliceName: es.GetName(), 206 } 207 } 208 209 // ParseEndpointSliceV1Beta1 parses a Kubernetes EndpointsSlice v1beta1 resource 210 // It reads ready and terminating state of endpoints in the EndpointSlice to 211 // return an EndpointSlice ID and a filtered list of Endpoints for service load-balancing. 212 func ParseEndpointSliceV1Beta1(ep *slim_discovery_v1beta1.EndpointSlice) *Endpoints { 213 endpoints := newEndpoints() 214 endpoints.ObjectMeta = ep.ObjectMeta 215 endpoints.EndpointSliceID = ParseEndpointSliceID(ep) 216 217 // Validate AddressType before parsing. Currently, we only support IPv4 and IPv6. 218 if ep.AddressType != slim_discovery_v1beta1.AddressTypeIPv4 && 219 ep.AddressType != slim_discovery_v1beta1.AddressTypeIPv6 { 220 return endpoints 221 } 222 223 for _, sub := range ep.Endpoints { 224 skipEndpoint := false 225 // ready indicates that this endpoint is prepared to receive traffic, 226 // according to whatever system is managing the endpoint. A nil value 227 // indicates an unknown state. In most cases consumers should interpret this 228 // unknown state as ready. 229 // More info: vendor/k8s.io/api/discovery/v1beta1/types.go 230 if sub.Conditions.Ready != nil && !*sub.Conditions.Ready { 231 skipEndpoint = true 232 if option.Config.EnableK8sTerminatingEndpoint { 233 // Terminating indicates that the endpoint is getting terminated. A 234 // nil values indicates an unknown state. Ready is never true when 235 // an endpoint is terminating. Propagate the terminating endpoint 236 // state so that we can gracefully remove those endpoints. 237 // More details : vendor/k8s.io/api/discovery/v1/types.go 238 if sub.Conditions.Terminating != nil && *sub.Conditions.Terminating { 239 skipEndpoint = false 240 } 241 } 242 } 243 if skipEndpoint { 244 continue 245 } 246 for _, addr := range sub.Addresses { 247 addrCluster, err := cmtypes.ParseAddrCluster(addr) 248 if err != nil { 249 continue 250 } 251 252 backend, ok := endpoints.Backends[addrCluster] 253 if !ok { 254 backend = &Backend{Ports: serviceStore.PortConfiguration{}} 255 endpoints.Backends[addrCluster] = backend 256 if nodeName, ok := sub.Topology[corev1.LabelHostname]; ok { 257 backend.NodeName = nodeName 258 } 259 if sub.Hostname != nil { 260 backend.Hostname = *sub.Hostname 261 } 262 if option.Config.EnableK8sTerminatingEndpoint { 263 if sub.Conditions.Terminating != nil && *sub.Conditions.Terminating { 264 backend.Terminating = true 265 metrics.TerminatingEndpointsEvents.Inc() 266 } 267 } 268 if zoneName, ok := sub.Topology[corev1.LabelTopologyZone]; ok { 269 backend.Zone = zoneName 270 } 271 } 272 273 for _, port := range ep.Ports { 274 name, lbPort := parseEndpointPortV1Beta1(port) 275 if lbPort != nil { 276 backend.Ports[name] = lbPort 277 } 278 } 279 } 280 } 281 return endpoints 282 } 283 284 // parseEndpointPortV1Beta1 returns the port name and the port parsed as a 285 // L4Addr from the given port. 286 func parseEndpointPortV1Beta1(port slim_discovery_v1beta1.EndpointPort) (string, *loadbalancer.L4Addr) { 287 proto := loadbalancer.TCP 288 if port.Protocol != nil { 289 switch *port.Protocol { 290 case slim_corev1.ProtocolTCP: 291 proto = loadbalancer.TCP 292 case slim_corev1.ProtocolUDP: 293 proto = loadbalancer.UDP 294 case slim_corev1.ProtocolSCTP: 295 proto = loadbalancer.SCTP 296 default: 297 return "", nil 298 } 299 } 300 if port.Port == nil { 301 return "", nil 302 } 303 var name string 304 if port.Name != nil { 305 name = *port.Name 306 } 307 lbPort := loadbalancer.NewL4Addr(proto, uint16(*port.Port)) 308 return name, lbPort 309 } 310 311 // ParseEndpointSliceV1 parses a Kubernetes EndpointSlice resource. 312 // It reads ready and terminating state of endpoints in the EndpointSlice to 313 // return an EndpointSlice ID and a filtered list of Endpoints for service load-balancing. 314 func ParseEndpointSliceV1(ep *slim_discovery_v1.EndpointSlice) *Endpoints { 315 endpoints := newEndpoints() 316 endpoints.ObjectMeta = ep.ObjectMeta 317 endpoints.EndpointSliceID = ParseEndpointSliceID(ep) 318 319 // Validate AddressType before parsing. Currently, we only support IPv4 and IPv6. 320 if ep.AddressType != slim_discovery_v1.AddressTypeIPv4 && 321 ep.AddressType != slim_discovery_v1.AddressTypeIPv6 { 322 return endpoints 323 } 324 325 log.Debugf("Processing %d endpoints for EndpointSlice %s", len(ep.Endpoints), ep.Name) 326 for _, sub := range ep.Endpoints { 327 // ready indicates that this endpoint is prepared to receive traffic, 328 // according to whatever system is managing the endpoint. A nil value 329 // indicates an unknown state. In most cases consumers should interpret this 330 // unknown state as ready. 331 // More info: vendor/k8s.io/api/discovery/v1/types.go 332 isReady := sub.Conditions.Ready == nil || *sub.Conditions.Ready 333 // serving is identical to ready except that it is set regardless of the 334 // terminating state of endpoints. This condition should be set to true for 335 // a ready endpoint that is terminating. If nil, consumers should defer to 336 // the ready condition. 337 // More info: vendor/k8s.io/api/discovery/v1/types.go 338 isServing := (sub.Conditions.Serving == nil && isReady) || (sub.Conditions.Serving != nil && *sub.Conditions.Serving) 339 // Terminating indicates that the endpoint is getting terminated. A 340 // nil values indicates an unknown state. Ready is never true when 341 // an endpoint is terminating. Propagate the terminating endpoint 342 // state so that we can gracefully remove those endpoints. 343 // More info: vendor/k8s.io/api/discovery/v1/types.go 344 isTerminating := sub.Conditions.Terminating != nil && *sub.Conditions.Terminating 345 346 // if is not Ready and EnableK8sTerminatingEndpoint is set 347 // allow endpoints that are Serving and Terminating 348 if !isReady { 349 if !option.Config.EnableK8sTerminatingEndpoint { 350 log.Debugf("discarding Endpoint on EndpointSlice %s: not Ready and EnableK8sTerminatingEndpoint %v", ep.Name, option.Config.EnableK8sTerminatingEndpoint) 351 continue 352 } 353 // filter not Serving endpoints since those can not receive traffic 354 if !isServing { 355 log.Debugf("discarding Endpoint on EndpointSlice %s: not Serving and EnableK8sTerminatingEndpoint %v", ep.Name, option.Config.EnableK8sTerminatingEndpoint) 356 continue 357 } 358 } 359 360 for _, addr := range sub.Addresses { 361 addrCluster, err := cmtypes.ParseAddrCluster(addr) 362 if err != nil { 363 log.WithError(err).Infof("Unable to parse address %s for EndpointSlices %s", addr, ep.Name) 364 continue 365 } 366 367 backend, ok := endpoints.Backends[addrCluster] 368 if !ok { 369 backend = &Backend{Ports: serviceStore.PortConfiguration{}} 370 endpoints.Backends[addrCluster] = backend 371 if sub.NodeName != nil { 372 backend.NodeName = *sub.NodeName 373 } else { 374 if nodeName, ok := sub.DeprecatedTopology[corev1.LabelHostname]; ok { 375 backend.NodeName = nodeName 376 } 377 } 378 if sub.Hostname != nil { 379 backend.Hostname = *sub.Hostname 380 } 381 if sub.Zone != nil { 382 backend.Zone = *sub.Zone 383 } else if zoneName, ok := sub.DeprecatedTopology[corev1.LabelTopologyZone]; ok { 384 backend.Zone = zoneName 385 } 386 // If is not ready check if is serving and terminating 387 if !isReady && option.Config.EnableK8sTerminatingEndpoint && 388 isServing && isTerminating { 389 log.Debugf("Endpoint address %s on EndpointSlice %s is Terminating", addr, ep.Name) 390 backend.Terminating = true 391 metrics.TerminatingEndpointsEvents.Inc() 392 } 393 } 394 395 for _, port := range ep.Ports { 396 name, lbPort := parseEndpointPortV1(port) 397 if lbPort != nil { 398 backend.Ports[name] = lbPort 399 } 400 } 401 if sub.Hints != nil && (*sub.Hints).ForZones != nil { 402 hints := (*sub.Hints).ForZones 403 backend.HintsForZones = make([]string, len(hints)) 404 for i, hint := range hints { 405 backend.HintsForZones[i] = hint.Name 406 } 407 } 408 } 409 } 410 411 log.Debugf("EndpointSlice %s has %d backends", ep.Name, len(endpoints.Backends)) 412 return endpoints 413 } 414 415 // parseEndpointPortV1 returns the port name and the port parsed as a L4Addr from 416 // the given port. 417 func parseEndpointPortV1(port slim_discovery_v1.EndpointPort) (string, *loadbalancer.L4Addr) { 418 proto := loadbalancer.TCP 419 if port.Protocol != nil { 420 switch *port.Protocol { 421 case slim_corev1.ProtocolTCP: 422 proto = loadbalancer.TCP 423 case slim_corev1.ProtocolUDP: 424 proto = loadbalancer.UDP 425 case slim_corev1.ProtocolSCTP: 426 proto = loadbalancer.SCTP 427 default: 428 return "", nil 429 } 430 } 431 if port.Port == nil { 432 return "", nil 433 } 434 var name string 435 if port.Name != nil { 436 name = *port.Name 437 } 438 lbPort := loadbalancer.NewL4Addr(proto, uint16(*port.Port)) 439 return name, lbPort 440 } 441 442 // EndpointSlices is the collection of all endpoint slices of a service. 443 // The map key is the name of the endpoint slice or the name of the legacy 444 // v1.Endpoint. The endpoints stored here are not namespaced since this 445 // structure is only used as a value of another map that is already namespaced. 446 // (see ServiceCache.endpoints). 447 // 448 // +deepequal-gen=true 449 type EndpointSlices struct { 450 epSlices map[string]*Endpoints 451 } 452 453 // newEndpointsSlices returns a new EndpointSlices 454 func newEndpointsSlices() *EndpointSlices { 455 return &EndpointSlices{ 456 epSlices: map[string]*Endpoints{}, 457 } 458 } 459 460 // GetEndpoints returns a read only a single *Endpoints structure with all 461 // Endpoints' backends joined. 462 func (es *EndpointSlices) GetEndpoints() *Endpoints { 463 if es == nil || len(es.epSlices) == 0 { 464 return nil 465 } 466 allEps := newEndpoints() 467 for _, eps := range es.epSlices { 468 for backend, ep := range eps.Backends { 469 // EndpointSlices may have duplicate addresses on different slices. 470 // kubectl get endpointslices -n endpointslicemirroring-4896 471 // NAME ADDRESSTYPE PORTS ENDPOINTS AGE 472 // example-custom-endpoints-f6z84 IPv4 9090 10.244.1.49 28s 473 // example-custom-endpoints-g6r6v IPv4 8090 10.244.1.49 28s 474 b, ok := allEps.Backends[backend] 475 if !ok { 476 allEps.Backends[backend] = ep.DeepCopy() 477 } else { 478 clone := b.DeepCopy() 479 for k, v := range ep.Ports { 480 clone.Ports[k] = v 481 } 482 allEps.Backends[backend] = clone 483 } 484 } 485 } 486 return allEps 487 } 488 489 // Upsert maps the 'esname' to 'e'. 490 // - 'esName': Name of the Endpoint Slice 491 // - 'e': Endpoints to store in the map 492 func (es *EndpointSlices) Upsert(esName string, e *Endpoints) { 493 if es == nil { 494 panic("BUG: EndpointSlices is nil") 495 } 496 es.epSlices[esName] = e 497 } 498 499 // Delete deletes the endpoint slice in the internal map. Returns true if there 500 // are not any more endpoints available in the map. 501 func (es *EndpointSlices) Delete(esName string) bool { 502 if es == nil || len(es.epSlices) == 0 { 503 return true 504 } 505 delete(es.epSlices, esName) 506 return len(es.epSlices) == 0 507 } 508 509 // externalEndpoints is the collection of external endpoints in all remote 510 // clusters. The map key is the name of the remote cluster. 511 type externalEndpoints struct { 512 endpoints map[string]*Endpoints 513 } 514 515 // newExternalEndpoints returns a new ExternalEndpoints 516 func newExternalEndpoints() externalEndpoints { 517 return externalEndpoints{ 518 endpoints: map[string]*Endpoints{}, 519 } 520 }