github.com/cilium/cilium@v1.16.2/pkg/redirectpolicy/manager.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package redirectpolicy 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "net" 11 "net/netip" 12 "sync" 13 14 "github.com/sirupsen/logrus" 15 "golang.org/x/sys/unix" 16 "k8s.io/apimachinery/pkg/util/sets" 17 18 agentK8s "github.com/cilium/cilium/daemon/k8s" 19 cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" 20 "github.com/cilium/cilium/pkg/endpoint" 21 "github.com/cilium/cilium/pkg/endpointmanager" 22 "github.com/cilium/cilium/pkg/k8s" 23 "github.com/cilium/cilium/pkg/k8s/resource" 24 slimcorev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 25 k8sUtils "github.com/cilium/cilium/pkg/k8s/utils" 26 lb "github.com/cilium/cilium/pkg/loadbalancer" 27 "github.com/cilium/cilium/pkg/lock" 28 "github.com/cilium/cilium/pkg/logging" 29 "github.com/cilium/cilium/pkg/logging/logfields" 30 "github.com/cilium/cilium/pkg/maps/lbmap" 31 "github.com/cilium/cilium/pkg/netns" 32 nodeTypes "github.com/cilium/cilium/pkg/node/types" 33 "github.com/cilium/cilium/pkg/option" 34 "github.com/cilium/cilium/pkg/service" 35 serviceStore "github.com/cilium/cilium/pkg/service/store" 36 ) 37 38 var ( 39 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "redirectpolicy") 40 localRedirectSvcStr = "-local-redirect" 41 ) 42 43 type svcManager interface { 44 DeleteService(frontend lb.L3n4Addr) (bool, error) 45 UpsertService(*lb.SVC) (bool, lb.ID, error) 46 TerminateUDPConnectionsToBackend(l3n4Addr *lb.L3n4Addr) 47 } 48 49 type svcCache interface { 50 EnsureService(svcID k8s.ServiceID, swg *lock.StoppableWaitGroup) bool 51 GetServiceAddrsWithType(svcID k8s.ServiceID, svcType lb.SVCType) (map[lb.FEPortName][]*lb.L3n4Addr, int) 52 GetServiceFrontendIP(svcID k8s.ServiceID, svcType lb.SVCType) net.IP 53 } 54 55 type endpointManager interface { 56 GetEndpointNetnsCookieByIP(ip netip.Addr) (uint64, error) 57 Subscribe(s endpointmanager.Subscriber) 58 } 59 60 // podID is pod name and namespace 61 type podID = k8s.ServiceID 62 63 // Manager manages configurations related to Local Redirect Policies 64 // that enable redirecting traffic from the specified frontend to a set of node-local 65 // backend pods selected based on the backend configuration. To do that, it keeps 66 // track of add/delete events for resources like LRP, Pod and Service. 67 // For every local redirect policy configuration, it creates a 68 // new lb.SVCTypeLocalRedirect service with a frontend that has at least one node-local backend. 69 type Manager struct { 70 // Service handler to manage service entries corresponding to redirect policies 71 svcManager svcManager 72 73 svcCache svcCache 74 75 localPods agentK8s.LocalPodResource 76 77 epManager endpointManager 78 79 skipLBMap lbmap.SkipLBMap 80 81 // Mutex to protect against concurrent access to the maps 82 mutex lock.Mutex 83 84 // Stores mapping of all the current redirect policy frontend to their 85 // respective policies 86 // Frontends are namespace agnostic 87 policyFrontendsByHash map[string]policyID 88 // Stores mapping of redirect policy serviceID to the corresponding policyID for 89 // easy lookup in policyConfigs 90 policyServices map[k8s.ServiceID]policyID 91 // Stores mapping of pods to redirect policies that select the pods 92 policyPods map[podID][]policyID 93 // Stores redirect policy configs indexed by policyID 94 policyConfigs map[policyID]*LRPConfig 95 // Stores mapping of pod endpoints to redirect policies that select the pods 96 policyEndpoints map[podID]sets.Set[policyID] 97 98 noNetnsCookieSupport bool 99 } 100 101 func NewRedirectPolicyManager(svc svcManager, svcCache *k8s.ServiceCache, lpr agentK8s.LocalPodResource, epM endpointManager) *Manager { 102 return &Manager{ 103 svcManager: svc, 104 svcCache: svcCache, 105 epManager: epM, 106 localPods: lpr, 107 policyFrontendsByHash: make(map[string]policyID), 108 policyServices: make(map[k8s.ServiceID]policyID), 109 policyPods: make(map[podID][]policyID), 110 policyConfigs: make(map[policyID]*LRPConfig), 111 policyEndpoints: make(map[podID]sets.Set[policyID]), 112 } 113 } 114 115 // Event handlers 116 117 // AddRedirectPolicy parses the given local redirect policy config, and updates 118 // internal state with the config fields. 119 func (rpm *Manager) AddRedirectPolicy(config LRPConfig) (bool, error) { 120 rpm.mutex.Lock() 121 defer rpm.mutex.Unlock() 122 123 if config.skipRedirectFromBackend { 124 rpm.noNetnsCookieSupport = sync.OnceValue[bool](func() bool { 125 if _, err := netns.GetNetNSCookie(); errors.Is(err, unix.ENOPROTOOPT) { 126 return true 127 } 128 rpm.epManager.Subscribe(rpm) 129 if rpm.skipLBMap == nil { 130 var err error 131 rpm.skipLBMap, err = lbmap.NewSkipLBMap() 132 if err != nil { 133 log.WithError(err).Warn("failed to init cilium_skip_lb maps: " + 134 "policies with skipRedirectFromBackend flag set not supported") 135 } 136 } 137 138 return false 139 })() 140 if rpm.noNetnsCookieSupport { 141 err := fmt.Errorf("policy with skipRedirectFromBackend flag set not applied" + 142 ":SO_NETNS_COOKIE not supported. Needs kernel version >= 5.8") 143 log.WithFields(logrus.Fields{ 144 logfields.LRPType: config.lrpType, 145 logfields.K8sNamespace: config.id.Namespace, 146 logfields.LRPName: config.id.Name, 147 }).Error(err) 148 return false, err 149 } 150 if rpm.skipLBMap == nil { 151 err := fmt.Errorf("policy with skipRedirectFromBackend flag set not applied:" + 152 "requires cilium_skip_lb maps") 153 log.WithFields(logrus.Fields{ 154 logfields.LRPType: config.lrpType, 155 logfields.K8sNamespace: config.id.Namespace, 156 logfields.LRPName: config.id.Name, 157 }).Error(err) 158 return false, err 159 } 160 } 161 162 _, ok := rpm.policyConfigs[config.id] 163 if ok { 164 // TODO Existing policy update 165 log.Warn("Local redirect policy updates are not handled") 166 return true, nil 167 } 168 169 err := rpm.isValidConfig(config) 170 if err != nil { 171 return false, err 172 } 173 174 // New redirect policy 175 rpm.storePolicyConfig(config) 176 177 switch config.lrpType { 178 case lrpConfigTypeAddr: 179 log.WithFields(logrus.Fields{ 180 logfields.LRPType: config.lrpType, 181 logfields.K8sNamespace: config.id.Namespace, 182 logfields.LRPName: config.id.Name, 183 logfields.LRPFrontends: config.frontendMappings, 184 logfields.LRPLocalEndpointSelector: config.backendSelector, 185 logfields.LRPBackendPorts: config.backendPorts, 186 logfields.LRPFrontendType: config.frontendType, 187 }).Debug("Add local redirect policy") 188 pods, err := rpm.getLocalPodsForPolicy(&config) 189 if err != nil { 190 return false, err 191 } 192 if len(pods) == 0 { 193 return true, nil 194 } 195 rpm.processConfig(&config, pods...) 196 197 case lrpConfigTypeSvc: 198 log.WithFields(logrus.Fields{ 199 logfields.LRPType: config.lrpType, 200 logfields.K8sNamespace: config.id.Namespace, 201 logfields.LRPName: config.id.Name, 202 logfields.K8sSvcID: config.serviceID, 203 logfields.LRPFrontends: config.frontendMappings, 204 logfields.LRPLocalEndpointSelector: config.backendSelector, 205 logfields.LRPBackendPorts: config.backendPorts, 206 logfields.LRPFrontendType: config.frontendType, 207 }).Debug("Add local redirect policy") 208 209 err := rpm.getAndUpsertPolicySvcConfig(&config) 210 if err != nil { 211 return false, err 212 } 213 } 214 215 return true, nil 216 } 217 218 // DeleteRedirectPolicy deletes the internal state associated with the given policy. 219 func (rpm *Manager) DeleteRedirectPolicy(config LRPConfig) error { 220 rpm.mutex.Lock() 221 defer rpm.mutex.Unlock() 222 223 storedConfig := rpm.policyConfigs[config.id] 224 if storedConfig == nil { 225 return fmt.Errorf("local redirect policy to be deleted not found") 226 } 227 log.WithFields(logrus.Fields{"policyID": config.id}). 228 Debug("Delete local redirect policy") 229 230 switch storedConfig.lrpType { 231 case lrpConfigTypeSvc: 232 rpm.deletePolicyService(storedConfig) 233 case lrpConfigTypeAddr: 234 for _, feM := range storedConfig.frontendMappings { 235 rpm.deletePolicyFrontend(storedConfig, feM.feAddr) 236 } 237 } 238 239 for p, pp := range rpm.policyPods { 240 var newPolicyList []policyID 241 for _, policy := range pp { 242 if policy != storedConfig.id { 243 newPolicyList = append(newPolicyList, policy) 244 } 245 } 246 if len(newPolicyList) > 0 { 247 rpm.policyPods[p] = newPolicyList 248 } else { 249 delete(rpm.policyPods, p) 250 } 251 } 252 rpm.deletePolicyConfig(storedConfig) 253 return nil 254 } 255 256 // OnAddService handles Kubernetes service (clusterIP type) add events, and 257 // updates the internal state for the policy config associated with the service. 258 func (rpm *Manager) OnAddService(svcID k8s.ServiceID) { 259 rpm.mutex.Lock() 260 defer rpm.mutex.Unlock() 261 if len(rpm.policyConfigs) == 0 { 262 return 263 } 264 265 // Check if this service is selected by any of the current policies. 266 if id, ok := rpm.policyServices[svcID]; ok { 267 // TODO Add unit test to assert lrpConfigType among other things. 268 config := rpm.policyConfigs[id] 269 if !config.checkNamespace(svcID.Namespace) { 270 return 271 } 272 rpm.getAndUpsertPolicySvcConfig(config) 273 } 274 } 275 276 // OnDeleteService handles Kubernetes service deletes, and deletes the internal state 277 // for the policy config that might be associated with the service. 278 func (rpm *Manager) OnDeleteService(svcID k8s.ServiceID) { 279 rpm.mutex.Lock() 280 defer rpm.mutex.Unlock() 281 if len(rpm.policyConfigs) == 0 { 282 return 283 } 284 285 rpm.deleteService(svcID) 286 } 287 288 func (rpm *Manager) OnAddPod(pod *slimcorev1.Pod) { 289 rpm.mutex.Lock() 290 defer rpm.mutex.Unlock() 291 292 if len(rpm.policyConfigs) == 0 { 293 return 294 } 295 // If the pod already exists in the internal cache, ignore all the subsequent 296 // events since they'll be handled in the OnUpdatePod callback. 297 // GH issue #13136 298 // TODO add unit test 299 id := k8s.ServiceID{ 300 Name: pod.GetName(), 301 Namespace: pod.GetNamespace(), 302 } 303 if _, ok := rpm.policyPods[id]; ok { 304 return 305 } 306 rpm.OnUpdatePodLocked(pod, false, true) 307 } 308 309 func (rpm *Manager) OnUpdatePodLocked(pod *slimcorev1.Pod, removeOld bool, upsertNew bool) { 310 if len(rpm.policyConfigs) == 0 { 311 return 312 } 313 314 id := podID{ 315 Name: pod.GetName(), 316 Namespace: pod.GetNamespace(), 317 } 318 319 if removeOld { 320 // Check if the pod was previously selected by any of the policies. 321 if policies, ok := rpm.policyPods[id]; ok { 322 for _, policy := range policies { 323 config := rpm.policyConfigs[policy] 324 rpm.deletePolicyBackends(config, id) 325 } 326 } 327 } 328 329 if !upsertNew { 330 return 331 } 332 var podData *podMetadata 333 pendingPolicies := sets.New[policyID]() 334 // Check if any of the current redirect policies select this pod. 335 for _, config := range rpm.policyConfigs { 336 if config.checkNamespace(pod.GetNamespace()) && config.policyConfigSelectsPod(pod) { 337 if podData = rpm.getPodMetadata(pod); podData == nil { 338 return 339 } 340 if !config.skipRedirectFromBackend { 341 rpm.processConfig(config, podData) 342 continue 343 } 344 } 345 } 346 if podData == nil { 347 return 348 } 349 // Process redirect policies that need additional pod metadata. 350 for _, config := range rpm.policyConfigs { 351 if !config.skipRedirectFromBackend { 352 continue 353 } 354 if podData.netnsCookie != 0 { 355 rpm.processConfig(config, podData) 356 continue 357 } 358 addr, _ := netip.ParseAddr(podData.ips[0]) 359 cookie, err := rpm.epManager.GetEndpointNetnsCookieByIP(addr) 360 if err != nil { 361 log.WithError(err).WithFields(logrus.Fields{ 362 "addr": addr, 363 }).Debug("Track pod for endpoint metadata") 364 // Netns cookie not available yet. 365 // Track the pod for this policy in order to retrieve metadata via endpoint events. 366 pendingPolicies.Insert(config.id) 367 continue 368 } 369 log.WithFields(logrus.Fields{ 370 logfields.K8sPodName: pod.Name, 371 logfields.NetnsCookie: cookie, 372 }).Debug("Pod endpoint netNsCookie") 373 podData.netnsCookie = cookie 374 } 375 if len(pendingPolicies) > 0 { 376 rpm.policyEndpoints[id] = pendingPolicies 377 } 378 } 379 380 func (rpm *Manager) OnUpdatePod(pod *slimcorev1.Pod, needsReassign bool, ready bool) { 381 rpm.mutex.Lock() 382 defer rpm.mutex.Unlock() 383 // TODO add unit test to validate that we get callbacks only for relevant events 384 rpm.OnUpdatePodLocked(pod, needsReassign || !ready, ready) 385 } 386 387 func (rpm *Manager) OnDeletePod(pod *slimcorev1.Pod) { 388 rpm.mutex.Lock() 389 defer rpm.mutex.Unlock() 390 if len(rpm.policyConfigs) == 0 { 391 return 392 } 393 id := k8s.ServiceID{ 394 Name: pod.GetName(), 395 Namespace: pod.GetNamespace(), 396 } 397 398 if policies, ok := rpm.policyPods[id]; ok { 399 for _, policy := range policies { 400 config := rpm.policyConfigs[policy] 401 rpm.deletePolicyBackends(config, id) 402 } 403 delete(rpm.policyPods, id) 404 } 405 delete(rpm.policyEndpoints, id) 406 } 407 408 func (rpm *Manager) EndpointCreated(ep *endpoint.Endpoint) { 409 podID := k8s.ServiceID{ 410 Name: ep.GetK8sPodName(), 411 Namespace: ep.GetK8sNamespace(), 412 } 413 rpm.mutex.Lock() 414 defer rpm.mutex.Unlock() 415 if policyIDs, found := rpm.policyEndpoints[podID]; found { 416 for _, id := range policyIDs.UnsortedList() { 417 config := rpm.policyConfigs[id] 418 // Track policies that need additional pod metadata for applying the policies. 419 if !config.skipRedirectFromBackend { 420 continue 421 } 422 podStore, _ := rpm.localPods.Store(context.TODO()) 423 pod, exists, err := podStore.GetByKey(resource.Key{Name: podID.Name, Namespace: podID.Namespace}) 424 if err != nil || !exists { 425 return 426 } 427 if k8sUtils.GetLatestPodReadiness(pod.Status) != slimcorev1.ConditionTrue { 428 return 429 } 430 podData := rpm.getPodMetadata(pod) 431 if podData == nil { 432 // This is a sanity check in case pod data isn't available yet. 433 return 434 } 435 podData.netnsCookie = ep.NetNsCookie 436 log.WithFields(logrus.Fields{ 437 "podID": podID, 438 logfields.NetnsCookie: ep.NetNsCookie, 439 }).Debug("Endpoint event metadata") 440 rpm.processConfig(config, podData) 441 } 442 } 443 } 444 445 func (rpm *Manager) EndpointDeleted(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) { 446 // No-op as clean-up is done in corresponding pod events. 447 } 448 449 func (rpm *Manager) EndpointRestored(ep *endpoint.Endpoint) { 450 rpm.EndpointCreated(ep) 451 } 452 453 // podMetadata stores relevant metadata associated with a pod that's updated during pod 454 // add/update events 455 type podMetadata struct { 456 labels map[string]string 457 // id the pod's name and namespace 458 id podID 459 // ips are pod's unique IPs 460 ips []string 461 // namedPorts stores pod port and protocol indexed by the port name 462 namedPorts serviceStore.PortConfiguration 463 // netnsCookie is the network namespace cookie 464 netnsCookie uint64 465 } 466 467 // Note: Following functions need to be called with the redirect policy manager lock. 468 469 // getAndUpsertPolicySvcConfig gets service frontends for the given config service 470 // and upserts the service frontends. 471 func (rpm *Manager) getAndUpsertPolicySvcConfig(config *LRPConfig) error { 472 switch config.frontendType { 473 case svcFrontendAll: 474 // Get all the service frontends. 475 addrsByPort, feIPsCount := rpm.svcCache.GetServiceAddrsWithType(*config.serviceID, 476 lb.SVCTypeClusterIP) 477 config.frontendMappings = make([]*feMapping, 0, len(addrsByPort)*feIPsCount) 478 for p, addrs := range addrsByPort { 479 for _, addr := range addrs { 480 feM := &feMapping{ 481 feAddr: addr, 482 fePort: string(p), 483 } 484 config.frontendMappings = append(config.frontendMappings, feM) 485 } 486 rpm.updateConfigSvcFrontend(config, addrs...) 487 } 488 489 case svcFrontendSinglePort: 490 // Get service frontend with the clusterIP and the policy config (unnamed) port. 491 ip := rpm.svcCache.GetServiceFrontendIP(*config.serviceID, lb.SVCTypeClusterIP) 492 if ip == nil { 493 // The LRP will be applied when the selected service is added later. 494 return nil 495 } 496 addrCluster := cmtypes.MustAddrClusterFromIP(ip) 497 config.frontendMappings[0].feAddr.AddrCluster = addrCluster 498 rpm.updateConfigSvcFrontend(config, config.frontendMappings[0].feAddr) 499 500 case svcFrontendNamedPorts: 501 // Get service frontends with the clusterIP and the policy config named ports. 502 ports := make([]string, len(config.frontendMappings)) 503 for i, mapping := range config.frontendMappings { 504 ports[i] = mapping.fePort 505 } 506 ip := rpm.svcCache.GetServiceFrontendIP(*config.serviceID, lb.SVCTypeClusterIP) 507 if ip == nil { 508 // The LRP will be applied when the selected service is added later. 509 return nil 510 } 511 addrCluster := cmtypes.MustAddrClusterFromIP(ip) 512 for _, feM := range config.frontendMappings { 513 feM.feAddr.AddrCluster = addrCluster 514 rpm.updateConfigSvcFrontend(config, feM.feAddr) 515 } 516 } 517 518 pods, err := rpm.getLocalPodsForPolicy(config) 519 if err != nil { 520 return err 521 } 522 if len(pods) > 0 { 523 rpm.processConfig(config, pods...) 524 } 525 return nil 526 } 527 528 // storePolicyConfig stores various state for the given policy config. 529 func (rpm *Manager) storePolicyConfig(config LRPConfig) { 530 rpm.policyConfigs[config.id] = &config 531 532 switch config.lrpType { 533 case lrpConfigTypeAddr: 534 for _, feM := range config.frontendMappings { 535 rpm.policyFrontendsByHash[feM.feAddr.Hash()] = config.id 536 } 537 case lrpConfigTypeSvc: 538 rpm.policyServices[*config.serviceID] = config.id 539 } 540 } 541 542 // deletePolicyConfig cleans up stored state for the given policy config. 543 func (rpm *Manager) deletePolicyConfig(config *LRPConfig) { 544 switch config.lrpType { 545 case lrpConfigTypeAddr: 546 for _, feM := range config.frontendMappings { 547 delete(rpm.policyFrontendsByHash, feM.feAddr.Hash()) 548 } 549 case lrpConfigTypeSvc: 550 delete(rpm.policyServices, *config.serviceID) 551 } 552 delete(rpm.policyConfigs, config.id) 553 } 554 555 func (rpm *Manager) updateConfigSvcFrontend(config *LRPConfig, frontends ...*frontend) { 556 for _, f := range frontends { 557 rpm.policyFrontendsByHash[f.Hash()] = config.id 558 } 559 rpm.policyConfigs[config.id] = config 560 } 561 562 func (rpm *Manager) deletePolicyBackends(config *LRPConfig, podID podID) { 563 l3nL4Addrs := sets.New[*lb.L3n4Addr]() 564 565 for _, fe := range config.frontendMappings { 566 newBes := make([]backend, 0, len(fe.podBackends)) 567 for _, be := range fe.podBackends { 568 // Remove the pod from the frontend's backends slice, keeping the 569 // order same. 570 if be.podID != podID { 571 newBes = append(newBes, be) 572 continue 573 } 574 if config.skipRedirectFromBackend { 575 if be.AddrCluster.Is4() { 576 rpm.skipLBMap.DeleteLB4ByNetnsCookie(be.podNetnsCookie) 577 } else { 578 rpm.skipLBMap.DeleteLB6ByNetnsCookie(be.podNetnsCookie) 579 } 580 } 581 l3nL4Addrs.Insert(&be.L3n4Addr) 582 } 583 fe.podBackends = newBes 584 rpm.notifyPolicyBackendDelete(config, fe) 585 } 586 for _, addr := range l3nL4Addrs.UnsortedList() { 587 rpm.svcManager.TerminateUDPConnectionsToBackend(addr) 588 } 589 } 590 591 // Deletes service entry for the specified frontend. 592 func (rpm *Manager) deletePolicyFrontend(config *LRPConfig, frontend *frontend) { 593 found, err := rpm.svcManager.DeleteService(*frontend) 594 delete(rpm.policyFrontendsByHash, frontend.Hash()) 595 if !found || err != nil { 596 log.WithError(err).Debugf("Local redirect service for policy %v not deleted", 597 config.id) 598 } 599 if config.skipRedirectFromBackend { 600 // Delete skip_lb map entries. 601 addr := frontend.AddrCluster 602 if addr.Is4() { 603 rpm.skipLBMap.DeleteLB4ByAddrPort(addr.AsNetIP(), frontend.Port) 604 } else { 605 rpm.skipLBMap.DeleteLB6ByAddrPort(addr.AsNetIP(), frontend.Port) 606 } 607 } 608 } 609 610 // Updates service manager with the new set of backends now configured in 'config'. 611 func (rpm *Manager) notifyPolicyBackendDelete(config *LRPConfig, frontendMapping *feMapping) { 612 if len(frontendMapping.podBackends) > 0 { 613 rpm.upsertService(config, frontendMapping) 614 } else { 615 // No backends so remove the service entry. 616 found, err := rpm.svcManager.DeleteService(*frontendMapping.feAddr) 617 if !found || err != nil { 618 log.WithError(err).Errorf("Local redirect service for policy (%v)"+ 619 " with frontend (%v) not deleted", config.id, frontendMapping.feAddr) 620 } 621 if config.lrpType == lrpConfigTypeSvc { 622 if restored := rpm.svcCache.EnsureService(*config.serviceID, lock.NewStoppableWaitGroup()); restored { 623 log.WithFields(logrus.Fields{ 624 logfields.K8sSvcID: *config.serviceID, 625 }).Info("Restored service") 626 } 627 } 628 } 629 } 630 631 // deletePolicyService deletes internal state associated with the specified service. 632 func (rpm *Manager) deletePolicyService(config *LRPConfig) { 633 for _, m := range config.frontendMappings { 634 rpm.deletePolicyFrontend(config, m.feAddr) 635 } 636 switch config.frontendType { 637 case svcFrontendAll: 638 config.frontendMappings = nil 639 case svcFrontendSinglePort: 640 fallthrough 641 case svcFrontendNamedPorts: 642 for _, feM := range config.frontendMappings { 643 feM.feAddr.AddrCluster = cmtypes.AddrCluster{} 644 } 645 } 646 // Retores the svc backends if there's still such a k8s svc. 647 swg := lock.NewStoppableWaitGroup() 648 svcID := *config.serviceID 649 if restored := rpm.svcCache.EnsureService(svcID, swg); restored { 650 log.WithFields(logrus.Fields{ 651 logfields.K8sSvcID: svcID, 652 }).Debug("Restored service") 653 } 654 } 655 656 func (rpm *Manager) deleteService(svcID k8s.ServiceID) { 657 var ( 658 rp policyID 659 ok bool 660 ) 661 if rp, ok = rpm.policyServices[svcID]; !ok { 662 return 663 } 664 // Get the policy config that selects this service. 665 config := rpm.policyConfigs[rp] 666 for _, m := range config.frontendMappings { 667 rpm.deletePolicyFrontend(config, m.feAddr) 668 } 669 switch config.frontendType { 670 case svcFrontendAll: 671 config.frontendMappings = nil 672 case svcFrontendSinglePort: 673 fallthrough 674 case svcFrontendNamedPorts: 675 for _, feM := range config.frontendMappings { 676 feM.feAddr.AddrCluster = cmtypes.AddrCluster{} 677 } 678 } 679 } 680 681 func (rpm *Manager) plumbSkipLBEntries(mapping *feMapping) error { 682 if rpm.skipLBMap == nil { 683 // We have early checks for the maps, so this is just for a sanity check. 684 return fmt.Errorf("failed to plumb skip LB entries") 685 } 686 for _, pb := range mapping.podBackends { 687 if pb.podNetnsCookie == 0 { 688 return fmt.Errorf("no valid pod netns cookie") 689 } 690 addr := mapping.feAddr 691 if addr.AddrCluster.Is4() { 692 if err := rpm.skipLBMap.AddLB4(pb.podNetnsCookie, addr.AddrCluster.AsNetIP(), addr.Port); err != nil { 693 return fmt.Errorf("failed to add entry to skip_lb4 map: %w", err) 694 } 695 } else { 696 if err := rpm.skipLBMap.AddLB6(pb.podNetnsCookie, addr.AddrCluster.AsNetIP(), addr.Port); err != nil { 697 return fmt.Errorf("failed to add entry to skip_lb6 map: %w", err) 698 } 699 } 700 } 701 702 return nil 703 } 704 705 func (rpm *Manager) upsertPolicyMapping(config *LRPConfig, feMapping *feMapping) { 706 if config.skipRedirectFromBackend { 707 if err := rpm.plumbSkipLBEntries(feMapping); err != nil { 708 log.WithError(err).WithFields(logrus.Fields{ 709 logfields.LRPType: config.lrpType, 710 logfields.K8sNamespace: config.id.Namespace, 711 logfields.LRPName: config.id.Name, 712 }).Error("LRP not applied due to error in plumbing skip_lb map") 713 return 714 } 715 } 716 rpm.upsertService(config, feMapping) 717 } 718 719 // upsertService upserts a service entry for the given policy config that's ready. 720 func (rpm *Manager) upsertService(config *LRPConfig, frontendMapping *feMapping) { 721 frontendAddr := lb.L3n4AddrID{ 722 L3n4Addr: *frontendMapping.feAddr, 723 ID: lb.ID(0), 724 } 725 backendAddrs := make([]*lb.Backend, 0, len(frontendMapping.podBackends)) 726 for _, be := range frontendMapping.podBackends { 727 backendAddrs = append(backendAddrs, &lb.Backend{ 728 NodeName: nodeTypes.GetName(), 729 L3n4Addr: be.L3n4Addr, 730 }) 731 } 732 p := &lb.SVC{ 733 Name: lb.ServiceName{ 734 Name: config.id.Name + localRedirectSvcStr, 735 Namespace: config.id.Namespace, 736 }, 737 Type: lb.SVCTypeLocalRedirect, 738 Frontend: frontendAddr, 739 Backends: backendAddrs, 740 ExtTrafficPolicy: lb.SVCTrafficPolicyCluster, 741 IntTrafficPolicy: lb.SVCTrafficPolicyCluster, 742 } 743 744 if _, _, err := rpm.svcManager.UpsertService(p); err != nil { 745 if errors.Is(err, service.NewErrLocalRedirectServiceExists(p.Frontend, p.Name)) { 746 log.WithError(err).Debug("Error while inserting service in LB map") 747 } else { 748 log.WithError(err).Error("Error while inserting service in LB map") 749 } 750 } 751 } 752 753 // Returns a slice of endpoint pods metadata that are selected by the given policy config. 754 func (rpm *Manager) getLocalPodsForPolicy(config *LRPConfig) ([]*podMetadata, error) { 755 var ( 756 retPods []*podMetadata 757 podData *podMetadata 758 err error 759 ) 760 761 podStore, err := rpm.localPods.Store(context.TODO()) 762 if err != nil { 763 log.WithError(err).Error("failed to get reference to local pod store") 764 return nil, err 765 } 766 for _, pod := range podStore.List() { 767 if !config.checkNamespace(pod.GetNamespace()) { 768 continue 769 } 770 if !config.policyConfigSelectsPod(pod) { 771 continue 772 } 773 if podData = rpm.getPodMetadata(pod); podData == nil { 774 continue 775 } 776 if k8sUtils.GetLatestPodReadiness(pod.Status) != slimcorev1.ConditionTrue { 777 continue 778 } 779 // The policy needs additional pod metadata. 780 if config.skipRedirectFromBackend { 781 addr, _ := netip.ParseAddr(podData.ips[0]) 782 cookie, err := rpm.epManager.GetEndpointNetnsCookieByIP(addr) 783 if err != nil { 784 log.WithError(err).WithFields(logrus.Fields{ 785 "addr": addr, 786 }).Debug("Track pod for endpoint metadata") 787 // Netns cookie not available yet. 788 // Track the pod for this policy in order to retrieve metadata via endpoint events. 789 podID := k8s.ServiceID{ 790 Name: pod.GetName(), 791 Namespace: pod.GetNamespace(), 792 } 793 pp, ok := rpm.policyEndpoints[podID] 794 if ok { 795 if !pp.Has(config.id) { 796 pp.Insert(config.id) 797 } 798 } else { 799 rpm.policyEndpoints[podID] = sets.New(config.id) 800 } 801 continue 802 } 803 log.WithFields(logrus.Fields{ 804 logfields.K8sPodName: pod.Name, 805 logfields.NetnsCookie: cookie, 806 }).Debug("Pod endpoint netNsCookie") 807 podData.netnsCookie = cookie 808 } 809 810 retPods = append(retPods, podData) 811 } 812 813 return retPods, nil 814 } 815 816 // isValidConfig validates the given policy config for duplicates. 817 // Note: The config is already sanitized. 818 func (rpm *Manager) isValidConfig(config LRPConfig) error { 819 switch config.lrpType { 820 case lrpConfigTypeAddr: 821 for _, feM := range config.frontendMappings { 822 fe := feM.feAddr 823 id, ok := rpm.policyFrontendsByHash[fe.Hash()] 824 if ok && config.id.Name != id.Name { 825 return fmt.Errorf("CiliumLocalRedirectPolicy for"+ 826 "frontend %v already exists : %v", fe, config.id.Name) 827 } 828 } 829 830 case lrpConfigTypeSvc: 831 p, ok := rpm.policyServices[*config.serviceID] 832 // Only 1 serviceMatcher policy is allowed for a service name within a namespace. 833 if ok && config.id.Namespace != "" && 834 config.id.Namespace == rpm.policyConfigs[p].id.Namespace { 835 return fmt.Errorf("CiliumLocalRedirectPolicy for"+ 836 " service %v already exists in namespace %v", config.serviceID, 837 config.id.Namespace) 838 } 839 } 840 841 return nil 842 } 843 844 func (rpm *Manager) processConfig(config *LRPConfig, pods ...*podMetadata) { 845 if config.lrpType == lrpConfigTypeSvc && len(config.frontendMappings) == 0 { 846 // Frontend information will be available when the selected service is added. 847 return 848 } 849 switch config.frontendType { 850 case svcFrontendSinglePort: 851 fallthrough 852 case addrFrontendSinglePort: 853 rpm.processConfigWithSinglePort(config, pods...) 854 case svcFrontendNamedPorts: 855 fallthrough 856 case addrFrontendNamedPorts: 857 rpm.processConfigWithNamedPorts(config, pods...) 858 case svcFrontendAll: 859 if len(config.frontendMappings) > 1 { 860 // The retrieved service frontend has multiple ports, in which case 861 // Kubernetes mandates that the ports be named. 862 rpm.processConfigWithNamedPorts(config, pods...) 863 } else { 864 // The retrieved service frontend has only 1 port, in which case 865 // port names are optional. 866 rpm.processConfigWithSinglePort(config, pods...) 867 } 868 } 869 } 870 871 // processConfigWithSinglePort upserts a policy config frontend with the corresponding 872 // backends. 873 // Frontend <ip, port, protocol> is mapped to backend <ip, port, protocol> entry. 874 // If a pod has multiple IPs, then there will be multiple backend entries created 875 // for the pod with common <port, protocol>. 876 func (rpm *Manager) processConfigWithSinglePort(config *LRPConfig, pods ...*podMetadata) { 877 var bes4 []backend 878 var bes6 []backend 879 880 // Generate and map pod backends to the policy frontend. The policy config 881 // is already sanitized, and has matching backend and frontend port protocol. 882 // We currently don't check which backends are updated before upserting a 883 // a service with the corresponding frontend. This can be optimized when LRPs 884 // are scaled up. 885 bePort := config.backendPorts[0] 886 feM := config.frontendMappings[0] 887 for _, pod := range pods { 888 for _, ip := range pod.ips { 889 beIP := net.ParseIP(ip) 890 if beIP == nil { 891 continue 892 } 893 be := backend{ 894 lb.L3n4Addr{ 895 AddrCluster: cmtypes.MustParseAddrCluster(ip), 896 L4Addr: lb.L4Addr{ 897 Protocol: bePort.l4Addr.Protocol, 898 Port: bePort.l4Addr.Port, 899 }, 900 }, pod.id, pod.netnsCookie, 901 } 902 if feM.feAddr.AddrCluster.Is4() && be.AddrCluster.Is4() { 903 if option.Config.EnableIPv4 { 904 bes4 = append(bes4, be) 905 } 906 } else if feM.feAddr.AddrCluster.Is6() && be.AddrCluster.Is6() { 907 if option.Config.EnableIPv6 { 908 bes6 = append(bes6, be) 909 } 910 } 911 } 912 if len(bes4) > 0 { 913 rpm.updateFrontendMapping(config, feM, pod.id, bes4) 914 } else if len(bes6) > 0 { 915 rpm.updateFrontendMapping(config, feM, pod.id, bes6) 916 } 917 } 918 rpm.upsertPolicyMapping(config, feM) 919 } 920 921 // processConfigWithNamedPorts upserts policy config frontends to the corresponding 922 // backends matched by port names. 923 func (rpm *Manager) processConfigWithNamedPorts(config *LRPConfig, pods ...*podMetadata) { 924 // Generate backends for the policy config's backend named ports, and then 925 // map the backends to policy frontends based on the named ports. 926 // We currently don't check which backends are updated before upserting a 927 // a service with the corresponding frontend. This can be optimized if LRPs 928 // are scaled up. 929 upsertFes := make([]*feMapping, 0, len(config.frontendMappings)) 930 for _, feM := range config.frontendMappings { 931 namedPort := feM.fePort 932 var ( 933 bes4 []backend 934 bes6 []backend 935 bePort *bePortInfo 936 ok bool 937 ) 938 if bePort, ok = config.backendPortsByPortName[namedPort]; !ok { 939 // The frontend named port not found in the backend ports map. 940 continue 941 } 942 if bePort.l4Addr.Protocol != feM.feAddr.Protocol { 943 continue 944 } 945 for _, pod := range pods { 946 if _, ok = pod.namedPorts[namedPort]; ok { 947 // Generate pod backends. 948 for _, ip := range pod.ips { 949 beIP := net.ParseIP(ip) 950 if beIP == nil { 951 continue 952 } 953 be := backend{ 954 lb.L3n4Addr{ 955 AddrCluster: cmtypes.MustParseAddrCluster(ip), 956 L4Addr: lb.L4Addr{ 957 Protocol: bePort.l4Addr.Protocol, 958 Port: bePort.l4Addr.Port, 959 }, 960 }, 961 pod.id, pod.netnsCookie, 962 } 963 if feM.feAddr.AddrCluster.Is4() && be.AddrCluster.Is4() { 964 if option.Config.EnableIPv4 { 965 bes4 = append(bes4, be) 966 } 967 } else if feM.feAddr.AddrCluster.Is6() && be.AddrCluster.Is6() { 968 if option.Config.EnableIPv6 { 969 bes6 = append(bes6, be) 970 } 971 } 972 } 973 } 974 if len(bes4) > 0 { 975 rpm.updateFrontendMapping(config, feM, pod.id, bes4) 976 } else if len(bes6) > 0 { 977 rpm.updateFrontendMapping(config, feM, pod.id, bes6) 978 } 979 } 980 if len(bes4) > 0 || len(bes6) > 0 { 981 upsertFes = append(upsertFes, feM) 982 } 983 } 984 for i := range upsertFes { 985 rpm.upsertPolicyMapping(config, upsertFes[i]) 986 } 987 } 988 989 // updateFrontendMapping updates policy config internal state and updates 990 // the policy frontend mapped backends. 991 func (rpm *Manager) updateFrontendMapping(config *LRPConfig, frontendMapping *feMapping, podID podID, backends []backend) { 992 newFePods := make([]backend, 0, len(frontendMapping.podBackends)+len(backends)) 993 updatePodBes := true 994 // Update the frontend mapped backends slice, keeping the order same. 995 for _, be := range frontendMapping.podBackends { 996 if be.podID == podID { 997 if updatePodBes { 998 updatePodBes = false 999 // Get the updated backends for the given pod. 1000 newFePods = append(newFePods, backends...) 1001 } 1002 } else { 1003 // Collect the unchanged backends for other pods. 1004 newFePods = append(newFePods, be) 1005 } 1006 } 1007 if updatePodBes { 1008 // New backend pod for the frontend 1009 newFePods = append(newFePods, backends...) 1010 } 1011 frontendMapping.podBackends = newFePods 1012 1013 if podPolicies, ok := rpm.policyPods[podID]; ok { 1014 newPodPolicy := true 1015 for _, poID := range podPolicies { 1016 // Existing pod policy update 1017 if poID == config.id { 1018 newPodPolicy = false 1019 break 1020 } 1021 } 1022 if newPodPolicy { 1023 // Pod selected by a new policy 1024 rpm.policyPods[podID] = append(rpm.policyPods[podID], config.id) 1025 } 1026 } else { 1027 // Pod selected by a policy for the first time 1028 pp := []policyID{config.id} 1029 rpm.policyPods[podID] = pp 1030 } 1031 } 1032 1033 func (rpm *Manager) getPodMetadata(pod *slimcorev1.Pod) *podMetadata { 1034 podIPs := k8sUtils.ValidIPs(pod.Status) 1035 if len(podIPs) == 0 { 1036 // IPs not available yet. 1037 return nil 1038 } 1039 1040 namedPorts := make(serviceStore.PortConfiguration) 1041 for _, container := range pod.Spec.Containers { 1042 for _, port := range container.Ports { 1043 if port.Name == "" { 1044 continue 1045 } 1046 namedPorts[port.Name] = lb.NewL4Addr(lb.L4Type(port.Protocol), 1047 uint16(port.ContainerPort)) 1048 } 1049 } 1050 1051 return &podMetadata{ 1052 ips: podIPs, 1053 labels: pod.GetLabels(), 1054 namedPorts: namedPorts, 1055 id: k8s.ServiceID{ 1056 Name: pod.GetName(), 1057 Namespace: pod.GetNamespace(), 1058 }, 1059 } 1060 } 1061 1062 func (rpm *Manager) GetLRPs() []*LRPConfig { 1063 rpm.mutex.Lock() 1064 defer rpm.mutex.Unlock() 1065 1066 lrps := make([]*LRPConfig, 0, len(rpm.policyConfigs)) 1067 for _, lrp := range rpm.policyConfigs { 1068 lrps = append(lrps, lrp) 1069 } 1070 1071 return lrps 1072 }